Linux-libre 5.7.6-gnu
[librecmc/linux-libre.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005 - 2016 Broadcom
4  * All rights reserved.
5  *
6  * Contact Information:
7  * linux-drivers@emulex.com
8  *
9  * Emulex
10  * 3333 Susan Street
11  * Costa Mesa, CA 92626
12  */
13
14 #include <linux/prefetch.h>
15 #include <linux/module.h>
16 #include "be.h"
17 #include "be_cmds.h"
18 #include <asm/div64.h>
19 #include <linux/aer.h>
20 #include <linux/if_bridge.h>
21 #include <net/busy_poll.h>
22 #include <net/vxlan.h>
23
24 MODULE_DESCRIPTION(DRV_DESC);
25 MODULE_AUTHOR("Emulex Corporation");
26 MODULE_LICENSE("GPL");
27
28 /* num_vfs module param is obsolete.
29  * Use sysfs method to enable/disable VFs.
30  */
31 static unsigned int num_vfs;
32 module_param(num_vfs, uint, 0444);
33 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
34
35 static ushort rx_frag_size = 2048;
36 module_param(rx_frag_size, ushort, 0444);
37 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
38
39 /* Per-module error detection/recovery workq shared across all functions.
40  * Each function schedules its own work request on this shared workq.
41  */
42 static struct workqueue_struct *be_err_recovery_workq;
43
44 static const struct pci_device_id be_dev_ids[] = {
45 #ifdef CONFIG_BE2NET_BE2
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48 #endif /* CONFIG_BE2NET_BE2 */
49 #ifdef CONFIG_BE2NET_BE3
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
51         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
52 #endif /* CONFIG_BE2NET_BE3 */
53 #ifdef CONFIG_BE2NET_LANCER
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 #endif /* CONFIG_BE2NET_LANCER */
57 #ifdef CONFIG_BE2NET_SKYHAWK
58         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
59         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
60 #endif /* CONFIG_BE2NET_SKYHAWK */
61         { 0 }
62 };
63 MODULE_DEVICE_TABLE(pci, be_dev_ids);
64
65 /* Workqueue used by all functions for defering cmd calls to the adapter */
66 static struct workqueue_struct *be_wq;
67
68 /* UE Status Low CSR */
69 static const char * const ue_status_low_desc[] = {
70         "CEV",
71         "CTX",
72         "DBUF",
73         "ERX",
74         "Host",
75         "MPU",
76         "NDMA",
77         "PTC ",
78         "RDMA ",
79         "RXF ",
80         "RXIPS ",
81         "RXULP0 ",
82         "RXULP1 ",
83         "RXULP2 ",
84         "TIM ",
85         "TPOST ",
86         "TPRE ",
87         "TXIPS ",
88         "TXULP0 ",
89         "TXULP1 ",
90         "UC ",
91         "WDMA ",
92         "TXULP2 ",
93         "HOST1 ",
94         "P0_OB_LINK ",
95         "P1_OB_LINK ",
96         "HOST_GPIO ",
97         "MBOX ",
98         "ERX2 ",
99         "SPARE ",
100         "JTAG ",
101         "MPU_INTPEND "
102 };
103
104 /* UE Status High CSR */
105 static const char * const ue_status_hi_desc[] = {
106         "LPCMEMHOST",
107         "MGMT_MAC",
108         "PCS0ONLINE",
109         "MPU_IRAM",
110         "PCS1ONLINE",
111         "PCTL0",
112         "PCTL1",
113         "PMEM",
114         "RR",
115         "TXPB",
116         "RXPP",
117         "XAUI",
118         "TXP",
119         "ARM",
120         "IPC",
121         "HOST2",
122         "HOST3",
123         "HOST4",
124         "HOST5",
125         "HOST6",
126         "HOST7",
127         "ECRC",
128         "Poison TLP",
129         "NETC",
130         "PERIPH",
131         "LLTXULP",
132         "D2P",
133         "RCON",
134         "LDMA",
135         "LLTXP",
136         "LLTXPB",
137         "Unknown"
138 };
139
140 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
141                                  BE_IF_FLAGS_BROADCAST | \
142                                  BE_IF_FLAGS_MULTICAST | \
143                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
144
145 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         if (mem->va) {
150                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
151                                   mem->dma);
152                 mem->va = NULL;
153         }
154 }
155
156 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
157                           u16 len, u16 entry_size)
158 {
159         struct be_dma_mem *mem = &q->dma_mem;
160
161         memset(q, 0, sizeof(*q));
162         q->len = len;
163         q->entry_size = entry_size;
164         mem->size = len * entry_size;
165         mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
166                                      &mem->dma, GFP_KERNEL);
167         if (!mem->va)
168                 return -ENOMEM;
169         return 0;
170 }
171
172 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
173 {
174         u32 reg, enabled;
175
176         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
177                               &reg);
178         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179
180         if (!enabled && enable)
181                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
182         else if (enabled && !enable)
183                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184         else
185                 return;
186
187         pci_write_config_dword(adapter->pdev,
188                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
189 }
190
191 static void be_intr_set(struct be_adapter *adapter, bool enable)
192 {
193         int status = 0;
194
195         /* On lancer interrupts can't be controlled via this register */
196         if (lancer_chip(adapter))
197                 return;
198
199         if (be_check_error(adapter, BE_ERROR_EEH))
200                 return;
201
202         status = be_cmd_intr_set(adapter, enable);
203         if (status)
204                 be_reg_intr_set(adapter, enable);
205 }
206
207 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
208 {
209         u32 val = 0;
210
211         if (be_check_error(adapter, BE_ERROR_HW))
212                 return;
213
214         val |= qid & DB_RQ_RING_ID_MASK;
215         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
216
217         wmb();
218         iowrite32(val, adapter->db + DB_RQ_OFFSET);
219 }
220
221 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
222                           u16 posted)
223 {
224         u32 val = 0;
225
226         if (be_check_error(adapter, BE_ERROR_HW))
227                 return;
228
229         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
230         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
231
232         wmb();
233         iowrite32(val, adapter->db + txo->db_offset);
234 }
235
236 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
237                          bool arm, bool clear_int, u16 num_popped,
238                          u32 eq_delay_mult_enc)
239 {
240         u32 val = 0;
241
242         val |= qid & DB_EQ_RING_ID_MASK;
243         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
244
245         if (be_check_error(adapter, BE_ERROR_HW))
246                 return;
247
248         if (arm)
249                 val |= 1 << DB_EQ_REARM_SHIFT;
250         if (clear_int)
251                 val |= 1 << DB_EQ_CLR_SHIFT;
252         val |= 1 << DB_EQ_EVNT_SHIFT;
253         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
254         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
255         iowrite32(val, adapter->db + DB_EQ_OFFSET);
256 }
257
258 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
259 {
260         u32 val = 0;
261
262         val |= qid & DB_CQ_RING_ID_MASK;
263         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
264                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
265
266         if (be_check_error(adapter, BE_ERROR_HW))
267                 return;
268
269         if (arm)
270                 val |= 1 << DB_CQ_REARM_SHIFT;
271         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
272         iowrite32(val, adapter->db + DB_CQ_OFFSET);
273 }
274
275 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
276 {
277         int i;
278
279         /* Check if mac has already been added as part of uc-list */
280         for (i = 0; i < adapter->uc_macs; i++) {
281                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
282                         /* mac already added, skip addition */
283                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
284                         return 0;
285                 }
286         }
287
288         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
289                                &adapter->pmac_id[0], 0);
290 }
291
292 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
293 {
294         int i;
295
296         /* Skip deletion if the programmed mac is
297          * being used in uc-list
298          */
299         for (i = 0; i < adapter->uc_macs; i++) {
300                 if (adapter->pmac_id[i + 1] == pmac_id)
301                         return;
302         }
303         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
304 }
305
306 static int be_mac_addr_set(struct net_device *netdev, void *p)
307 {
308         struct be_adapter *adapter = netdev_priv(netdev);
309         struct device *dev = &adapter->pdev->dev;
310         struct sockaddr *addr = p;
311         int status;
312         u8 mac[ETH_ALEN];
313         u32 old_pmac_id = adapter->pmac_id[0];
314
315         if (!is_valid_ether_addr(addr->sa_data))
316                 return -EADDRNOTAVAIL;
317
318         /* Proceed further only if, User provided MAC is different
319          * from active MAC
320          */
321         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
322                 return 0;
323
324         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
325          * address
326          */
327         if (BEx_chip(adapter) && be_virtfn(adapter) &&
328             !check_privilege(adapter, BE_PRIV_FILTMGMT))
329                 return -EPERM;
330
331         /* if device is not running, copy MAC to netdev->dev_addr */
332         if (!netif_running(netdev))
333                 goto done;
334
335         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
336          * privilege or if PF did not provision the new MAC address.
337          * On BE3, this cmd will always fail if the VF doesn't have the
338          * FILTMGMT privilege. This failure is OK, only if the PF programmed
339          * the MAC for the VF.
340          */
341         mutex_lock(&adapter->rx_filter_lock);
342         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
343         if (!status) {
344
345                 /* Delete the old programmed MAC. This call may fail if the
346                  * old MAC was already deleted by the PF driver.
347                  */
348                 if (adapter->pmac_id[0] != old_pmac_id)
349                         be_dev_mac_del(adapter, old_pmac_id);
350         }
351
352         mutex_unlock(&adapter->rx_filter_lock);
353         /* Decide if the new MAC is successfully activated only after
354          * querying the FW
355          */
356         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
357                                        adapter->if_handle, true, 0);
358         if (status)
359                 goto err;
360
361         /* The MAC change did not happen, either due to lack of privilege
362          * or PF didn't pre-provision.
363          */
364         if (!ether_addr_equal(addr->sa_data, mac)) {
365                 status = -EPERM;
366                 goto err;
367         }
368
369         /* Remember currently programmed MAC */
370         ether_addr_copy(adapter->dev_mac, addr->sa_data);
371 done:
372         ether_addr_copy(netdev->dev_addr, addr->sa_data);
373         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
374         return 0;
375 err:
376         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
377         return status;
378 }
379
380 /* BE2 supports only v0 cmd */
381 static void *hw_stats_from_cmd(struct be_adapter *adapter)
382 {
383         if (BE2_chip(adapter)) {
384                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
385
386                 return &cmd->hw_stats;
387         } else if (BE3_chip(adapter)) {
388                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
389
390                 return &cmd->hw_stats;
391         } else {
392                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
393
394                 return &cmd->hw_stats;
395         }
396 }
397
398 /* BE2 supports only v0 cmd */
399 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
400 {
401         if (BE2_chip(adapter)) {
402                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
403
404                 return &hw_stats->erx;
405         } else if (BE3_chip(adapter)) {
406                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
407
408                 return &hw_stats->erx;
409         } else {
410                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
411
412                 return &hw_stats->erx;
413         }
414 }
415
416 static void populate_be_v0_stats(struct be_adapter *adapter)
417 {
418         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
419         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
420         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
421         struct be_port_rxf_stats_v0 *port_stats =
422                                         &rxf_stats->port[adapter->port_num];
423         struct be_drv_stats *drvs = &adapter->drv_stats;
424
425         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
426         drvs->rx_pause_frames = port_stats->rx_pause_frames;
427         drvs->rx_crc_errors = port_stats->rx_crc_errors;
428         drvs->rx_control_frames = port_stats->rx_control_frames;
429         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
430         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
431         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
432         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
433         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
434         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
435         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
436         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
437         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
438         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
439         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
440         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
441         drvs->rx_dropped_header_too_small =
442                 port_stats->rx_dropped_header_too_small;
443         drvs->rx_address_filtered =
444                                         port_stats->rx_address_filtered +
445                                         port_stats->rx_vlan_filtered;
446         drvs->rx_alignment_symbol_errors =
447                 port_stats->rx_alignment_symbol_errors;
448
449         drvs->tx_pauseframes = port_stats->tx_pauseframes;
450         drvs->tx_controlframes = port_stats->tx_controlframes;
451
452         if (adapter->port_num)
453                 drvs->jabber_events = rxf_stats->port1_jabber_events;
454         else
455                 drvs->jabber_events = rxf_stats->port0_jabber_events;
456         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
457         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
458         drvs->forwarded_packets = rxf_stats->forwarded_packets;
459         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
460         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
461         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
462         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
463 }
464
465 static void populate_be_v1_stats(struct be_adapter *adapter)
466 {
467         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
468         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
469         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
470         struct be_port_rxf_stats_v1 *port_stats =
471                                         &rxf_stats->port[adapter->port_num];
472         struct be_drv_stats *drvs = &adapter->drv_stats;
473
474         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
475         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
476         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
477         drvs->rx_pause_frames = port_stats->rx_pause_frames;
478         drvs->rx_crc_errors = port_stats->rx_crc_errors;
479         drvs->rx_control_frames = port_stats->rx_control_frames;
480         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
481         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
482         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
483         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
484         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
485         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
486         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
487         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
488         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
489         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
490         drvs->rx_dropped_header_too_small =
491                 port_stats->rx_dropped_header_too_small;
492         drvs->rx_input_fifo_overflow_drop =
493                 port_stats->rx_input_fifo_overflow_drop;
494         drvs->rx_address_filtered = port_stats->rx_address_filtered;
495         drvs->rx_alignment_symbol_errors =
496                 port_stats->rx_alignment_symbol_errors;
497         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
498         drvs->tx_pauseframes = port_stats->tx_pauseframes;
499         drvs->tx_controlframes = port_stats->tx_controlframes;
500         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
501         drvs->jabber_events = port_stats->jabber_events;
502         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
503         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
504         drvs->forwarded_packets = rxf_stats->forwarded_packets;
505         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
506         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
507         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
508         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
509 }
510
511 static void populate_be_v2_stats(struct be_adapter *adapter)
512 {
513         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
514         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
515         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
516         struct be_port_rxf_stats_v2 *port_stats =
517                                         &rxf_stats->port[adapter->port_num];
518         struct be_drv_stats *drvs = &adapter->drv_stats;
519
520         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
521         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
522         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
523         drvs->rx_pause_frames = port_stats->rx_pause_frames;
524         drvs->rx_crc_errors = port_stats->rx_crc_errors;
525         drvs->rx_control_frames = port_stats->rx_control_frames;
526         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
527         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
528         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
529         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
530         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
531         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
532         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
533         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
534         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
535         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
536         drvs->rx_dropped_header_too_small =
537                 port_stats->rx_dropped_header_too_small;
538         drvs->rx_input_fifo_overflow_drop =
539                 port_stats->rx_input_fifo_overflow_drop;
540         drvs->rx_address_filtered = port_stats->rx_address_filtered;
541         drvs->rx_alignment_symbol_errors =
542                 port_stats->rx_alignment_symbol_errors;
543         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
544         drvs->tx_pauseframes = port_stats->tx_pauseframes;
545         drvs->tx_controlframes = port_stats->tx_controlframes;
546         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
547         drvs->jabber_events = port_stats->jabber_events;
548         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
549         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
550         drvs->forwarded_packets = rxf_stats->forwarded_packets;
551         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
552         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
553         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
554         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
555         if (be_roce_supported(adapter)) {
556                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
557                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
558                 drvs->rx_roce_frames = port_stats->roce_frames_received;
559                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
560                 drvs->roce_drops_payload_len =
561                         port_stats->roce_drops_payload_len;
562         }
563 }
564
565 static void populate_lancer_stats(struct be_adapter *adapter)
566 {
567         struct be_drv_stats *drvs = &adapter->drv_stats;
568         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
569
570         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
571         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
572         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
573         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
574         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
575         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
576         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
577         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
578         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
579         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
580         drvs->rx_dropped_tcp_length =
581                                 pport_stats->rx_dropped_invalid_tcp_length;
582         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
583         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
584         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
585         drvs->rx_dropped_header_too_small =
586                                 pport_stats->rx_dropped_header_too_small;
587         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
588         drvs->rx_address_filtered =
589                                         pport_stats->rx_address_filtered +
590                                         pport_stats->rx_vlan_filtered;
591         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
592         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
594         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
595         drvs->jabber_events = pport_stats->rx_jabbers;
596         drvs->forwarded_packets = pport_stats->num_forwards_lo;
597         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
598         drvs->rx_drops_too_many_frags =
599                                 pport_stats->rx_drops_too_many_frags_lo;
600 }
601
602 static void accumulate_16bit_val(u32 *acc, u16 val)
603 {
604 #define lo(x)                   (x & 0xFFFF)
605 #define hi(x)                   (x & 0xFFFF0000)
606         bool wrapped = val < lo(*acc);
607         u32 newacc = hi(*acc) + val;
608
609         if (wrapped)
610                 newacc += 65536;
611         WRITE_ONCE(*acc, newacc);
612 }
613
614 static void populate_erx_stats(struct be_adapter *adapter,
615                                struct be_rx_obj *rxo, u32 erx_stat)
616 {
617         if (!BEx_chip(adapter))
618                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
619         else
620                 /* below erx HW counter can actually wrap around after
621                  * 65535. Driver accumulates a 32-bit value
622                  */
623                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
624                                      (u16)erx_stat);
625 }
626
627 void be_parse_stats(struct be_adapter *adapter)
628 {
629         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
630         struct be_rx_obj *rxo;
631         int i;
632         u32 erx_stat;
633
634         if (lancer_chip(adapter)) {
635                 populate_lancer_stats(adapter);
636         } else {
637                 if (BE2_chip(adapter))
638                         populate_be_v0_stats(adapter);
639                 else if (BE3_chip(adapter))
640                         /* for BE3 */
641                         populate_be_v1_stats(adapter);
642                 else
643                         populate_be_v2_stats(adapter);
644
645                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
646                 for_all_rx_queues(adapter, rxo, i) {
647                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
648                         populate_erx_stats(adapter, rxo, erx_stat);
649                 }
650         }
651 }
652
653 static void be_get_stats64(struct net_device *netdev,
654                            struct rtnl_link_stats64 *stats)
655 {
656         struct be_adapter *adapter = netdev_priv(netdev);
657         struct be_drv_stats *drvs = &adapter->drv_stats;
658         struct be_rx_obj *rxo;
659         struct be_tx_obj *txo;
660         u64 pkts, bytes;
661         unsigned int start;
662         int i;
663
664         for_all_rx_queues(adapter, rxo, i) {
665                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
666
667                 do {
668                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
669                         pkts = rx_stats(rxo)->rx_pkts;
670                         bytes = rx_stats(rxo)->rx_bytes;
671                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
672                 stats->rx_packets += pkts;
673                 stats->rx_bytes += bytes;
674                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
675                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
676                                         rx_stats(rxo)->rx_drops_no_frags;
677         }
678
679         for_all_tx_queues(adapter, txo, i) {
680                 const struct be_tx_stats *tx_stats = tx_stats(txo);
681
682                 do {
683                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
684                         pkts = tx_stats(txo)->tx_pkts;
685                         bytes = tx_stats(txo)->tx_bytes;
686                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
687                 stats->tx_packets += pkts;
688                 stats->tx_bytes += bytes;
689         }
690
691         /* bad pkts received */
692         stats->rx_errors = drvs->rx_crc_errors +
693                 drvs->rx_alignment_symbol_errors +
694                 drvs->rx_in_range_errors +
695                 drvs->rx_out_range_errors +
696                 drvs->rx_frame_too_long +
697                 drvs->rx_dropped_too_small +
698                 drvs->rx_dropped_too_short +
699                 drvs->rx_dropped_header_too_small +
700                 drvs->rx_dropped_tcp_length +
701                 drvs->rx_dropped_runt;
702
703         /* detailed rx errors */
704         stats->rx_length_errors = drvs->rx_in_range_errors +
705                 drvs->rx_out_range_errors +
706                 drvs->rx_frame_too_long;
707
708         stats->rx_crc_errors = drvs->rx_crc_errors;
709
710         /* frame alignment errors */
711         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
712
713         /* receiver fifo overrun */
714         /* drops_no_pbuf is no per i/f, it's per BE card */
715         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
716                                 drvs->rx_input_fifo_overflow_drop +
717                                 drvs->rx_drops_no_pbuf;
718 }
719
720 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
721 {
722         struct net_device *netdev = adapter->netdev;
723
724         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
725                 netif_carrier_off(netdev);
726                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
727         }
728
729         if (link_status)
730                 netif_carrier_on(netdev);
731         else
732                 netif_carrier_off(netdev);
733
734         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
735 }
736
737 static int be_gso_hdr_len(struct sk_buff *skb)
738 {
739         if (skb->encapsulation)
740                 return skb_inner_transport_offset(skb) +
741                        inner_tcp_hdrlen(skb);
742         return skb_transport_offset(skb) + tcp_hdrlen(skb);
743 }
744
745 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
746 {
747         struct be_tx_stats *stats = tx_stats(txo);
748         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
749         /* Account for headers which get duplicated in TSO pkt */
750         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
751
752         u64_stats_update_begin(&stats->sync);
753         stats->tx_reqs++;
754         stats->tx_bytes += skb->len + dup_hdr_len;
755         stats->tx_pkts += tx_pkts;
756         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
757                 stats->tx_vxlan_offload_pkts += tx_pkts;
758         u64_stats_update_end(&stats->sync);
759 }
760
761 /* Returns number of WRBs needed for the skb */
762 static u32 skb_wrb_cnt(struct sk_buff *skb)
763 {
764         /* +1 for the header wrb */
765         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
766 }
767
768 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
769 {
770         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
771         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
772         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
773         wrb->rsvd0 = 0;
774 }
775
776 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
777  * to avoid the swap and shift/mask operations in wrb_fill().
778  */
779 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
780 {
781         wrb->frag_pa_hi = 0;
782         wrb->frag_pa_lo = 0;
783         wrb->frag_len = 0;
784         wrb->rsvd0 = 0;
785 }
786
787 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
788                                      struct sk_buff *skb)
789 {
790         u8 vlan_prio;
791         u16 vlan_tag;
792
793         vlan_tag = skb_vlan_tag_get(skb);
794         vlan_prio = skb_vlan_tag_get_prio(skb);
795         /* If vlan priority provided by OS is NOT in available bmap */
796         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
797                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
798                                 adapter->recommended_prio_bits;
799
800         return vlan_tag;
801 }
802
803 /* Used only for IP tunnel packets */
804 static u16 skb_inner_ip_proto(struct sk_buff *skb)
805 {
806         return (inner_ip_hdr(skb)->version == 4) ?
807                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
808 }
809
810 static u16 skb_ip_proto(struct sk_buff *skb)
811 {
812         return (ip_hdr(skb)->version == 4) ?
813                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
814 }
815
816 static inline bool be_is_txq_full(struct be_tx_obj *txo)
817 {
818         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
819 }
820
821 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) < txo->q.len / 2;
824 }
825
826 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
827 {
828         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
829 }
830
831 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
832                                        struct sk_buff *skb,
833                                        struct be_wrb_params *wrb_params)
834 {
835         u16 proto;
836
837         if (skb_is_gso(skb)) {
838                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
839                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
840                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
841                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
842         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
843                 if (skb->encapsulation) {
844                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
845                         proto = skb_inner_ip_proto(skb);
846                 } else {
847                         proto = skb_ip_proto(skb);
848                 }
849                 if (proto == IPPROTO_TCP)
850                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
851                 else if (proto == IPPROTO_UDP)
852                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
853         }
854
855         if (skb_vlan_tag_present(skb)) {
856                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
857                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
858         }
859
860         BE_WRB_F_SET(wrb_params->features, CRC, 1);
861 }
862
863 static void wrb_fill_hdr(struct be_adapter *adapter,
864                          struct be_eth_hdr_wrb *hdr,
865                          struct be_wrb_params *wrb_params,
866                          struct sk_buff *skb)
867 {
868         memset(hdr, 0, sizeof(*hdr));
869
870         SET_TX_WRB_HDR_BITS(crc, hdr,
871                             BE_WRB_F_GET(wrb_params->features, CRC));
872         SET_TX_WRB_HDR_BITS(ipcs, hdr,
873                             BE_WRB_F_GET(wrb_params->features, IPCS));
874         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
875                             BE_WRB_F_GET(wrb_params->features, TCPCS));
876         SET_TX_WRB_HDR_BITS(udpcs, hdr,
877                             BE_WRB_F_GET(wrb_params->features, UDPCS));
878
879         SET_TX_WRB_HDR_BITS(lso, hdr,
880                             BE_WRB_F_GET(wrb_params->features, LSO));
881         SET_TX_WRB_HDR_BITS(lso6, hdr,
882                             BE_WRB_F_GET(wrb_params->features, LSO6));
883         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
884
885         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
886          * hack is not needed, the evt bit is set while ringing DB.
887          */
888         SET_TX_WRB_HDR_BITS(event, hdr,
889                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
890         SET_TX_WRB_HDR_BITS(vlan, hdr,
891                             BE_WRB_F_GET(wrb_params->features, VLAN));
892         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
893
894         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
895         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
896         SET_TX_WRB_HDR_BITS(mgmt, hdr,
897                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
898 }
899
900 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
901                           bool unmap_single)
902 {
903         dma_addr_t dma;
904         u32 frag_len = le32_to_cpu(wrb->frag_len);
905
906
907         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
908                 (u64)le32_to_cpu(wrb->frag_pa_lo);
909         if (frag_len) {
910                 if (unmap_single)
911                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
912                 else
913                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
914         }
915 }
916
917 /* Grab a WRB header for xmit */
918 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
919 {
920         u32 head = txo->q.head;
921
922         queue_head_inc(&txo->q);
923         return head;
924 }
925
926 /* Set up the WRB header for xmit */
927 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
928                                 struct be_tx_obj *txo,
929                                 struct be_wrb_params *wrb_params,
930                                 struct sk_buff *skb, u16 head)
931 {
932         u32 num_frags = skb_wrb_cnt(skb);
933         struct be_queue_info *txq = &txo->q;
934         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
935
936         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
937         be_dws_cpu_to_le(hdr, sizeof(*hdr));
938
939         BUG_ON(txo->sent_skb_list[head]);
940         txo->sent_skb_list[head] = skb;
941         txo->last_req_hdr = head;
942         atomic_add(num_frags, &txq->used);
943         txo->last_req_wrb_cnt = num_frags;
944         txo->pend_wrb_cnt += num_frags;
945 }
946
947 /* Setup a WRB fragment (buffer descriptor) for xmit */
948 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
949                                  int len)
950 {
951         struct be_eth_wrb *wrb;
952         struct be_queue_info *txq = &txo->q;
953
954         wrb = queue_head_node(txq);
955         wrb_fill(wrb, busaddr, len);
956         queue_head_inc(txq);
957 }
958
959 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
960  * was invoked. The producer index is restored to the previous packet and the
961  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
962  */
963 static void be_xmit_restore(struct be_adapter *adapter,
964                             struct be_tx_obj *txo, u32 head, bool map_single,
965                             u32 copied)
966 {
967         struct device *dev;
968         struct be_eth_wrb *wrb;
969         struct be_queue_info *txq = &txo->q;
970
971         dev = &adapter->pdev->dev;
972         txq->head = head;
973
974         /* skip the first wrb (hdr); it's not mapped */
975         queue_head_inc(txq);
976         while (copied) {
977                 wrb = queue_head_node(txq);
978                 unmap_tx_frag(dev, wrb, map_single);
979                 map_single = false;
980                 copied -= le32_to_cpu(wrb->frag_len);
981                 queue_head_inc(txq);
982         }
983
984         txq->head = head;
985 }
986
987 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
988  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
989  * of WRBs used up by the packet.
990  */
991 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
992                            struct sk_buff *skb,
993                            struct be_wrb_params *wrb_params)
994 {
995         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
996         struct device *dev = &adapter->pdev->dev;
997         bool map_single = false;
998         u32 head;
999         dma_addr_t busaddr;
1000         int len;
1001
1002         head = be_tx_get_wrb_hdr(txo);
1003
1004         if (skb->len > skb->data_len) {
1005                 len = skb_headlen(skb);
1006
1007                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1008                 if (dma_mapping_error(dev, busaddr))
1009                         goto dma_err;
1010                 map_single = true;
1011                 be_tx_setup_wrb_frag(txo, busaddr, len);
1012                 copied += len;
1013         }
1014
1015         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1016                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1017                 len = skb_frag_size(frag);
1018
1019                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1020                 if (dma_mapping_error(dev, busaddr))
1021                         goto dma_err;
1022                 be_tx_setup_wrb_frag(txo, busaddr, len);
1023                 copied += len;
1024         }
1025
1026         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1027
1028         be_tx_stats_update(txo, skb);
1029         return wrb_cnt;
1030
1031 dma_err:
1032         adapter->drv_stats.dma_map_errors++;
1033         be_xmit_restore(adapter, txo, head, map_single, copied);
1034         return 0;
1035 }
1036
1037 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1038 {
1039         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1040 }
1041
1042 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1043                                              struct sk_buff *skb,
1044                                              struct be_wrb_params
1045                                              *wrb_params)
1046 {
1047         bool insert_vlan = false;
1048         u16 vlan_tag = 0;
1049
1050         skb = skb_share_check(skb, GFP_ATOMIC);
1051         if (unlikely(!skb))
1052                 return skb;
1053
1054         if (skb_vlan_tag_present(skb)) {
1055                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1056                 insert_vlan = true;
1057         }
1058
1059         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1060                 if (!insert_vlan) {
1061                         vlan_tag = adapter->pvid;
1062                         insert_vlan = true;
1063                 }
1064                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                  * skip VLAN insertion
1066                  */
1067                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068         }
1069
1070         if (insert_vlan) {
1071                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                 vlan_tag);
1073                 if (unlikely(!skb))
1074                         return skb;
1075                 __vlan_hwaccel_clear_tag(skb);
1076         }
1077
1078         /* Insert the outer VLAN, if any */
1079         if (adapter->qnq_vid) {
1080                 vlan_tag = adapter->qnq_vid;
1081                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                 vlan_tag);
1083                 if (unlikely(!skb))
1084                         return skb;
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086         }
1087
1088         return skb;
1089 }
1090
1091 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092 {
1093         struct ethhdr *eh = (struct ethhdr *)skb->data;
1094         u16 offset = ETH_HLEN;
1095
1096         if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                 offset += sizeof(struct ipv6hdr);
1100                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                     ip6h->nexthdr != NEXTHDR_UDP) {
1102                         struct ipv6_opt_hdr *ehdr =
1103                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                         if (ehdr->hdrlen == 0xff)
1107                                 return true;
1108                 }
1109         }
1110         return false;
1111 }
1112
1113 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114 {
1115         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116 }
1117
1118 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121 }
1122
1123 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                   struct sk_buff *skb,
1125                                                   struct be_wrb_params
1126                                                   *wrb_params)
1127 {
1128         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129         unsigned int eth_hdr_len;
1130         struct iphdr *ip;
1131
1132         /* For padded packets, BE HW modifies tot_len field in IP header
1133          * incorrecly when VLAN tag is inserted by HW.
1134          * For padded packets, Lancer computes incorrect checksum.
1135          */
1136         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                 VLAN_ETH_HLEN : ETH_HLEN;
1138         if (skb->len <= 60 &&
1139             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140             is_ipv4_pkt(skb)) {
1141                 ip = (struct iphdr *)ip_hdr(skb);
1142                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143         }
1144
1145         /* If vlan tag is already inlined in the packet, skip HW VLAN
1146          * tagging in pvid-tagging mode
1147          */
1148         if (be_pvid_tagging_enabled(adapter) &&
1149             veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152         /* HW has a bug wherein it will calculate CSUM for VLAN
1153          * pkts even though it is disabled.
1154          * Manually insert VLAN in pkt.
1155          */
1156         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157             skb_vlan_tag_present(skb)) {
1158                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                 if (unlikely(!skb))
1160                         goto err;
1161         }
1162
1163         /* HW may lockup when VLAN HW tagging is requested on
1164          * certain ipv6 packets. Drop such pkts if the HW workaround to
1165          * skip HW tagging is not enabled by FW.
1166          */
1167         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                      (adapter->pvid || adapter->qnq_vid) &&
1169                      !qnq_async_evt_rcvd(adapter)))
1170                 goto tx_drop;
1171
1172         /* Manual VLAN tag insertion to prevent:
1173          * ASIC lockup when the ASIC inserts VLAN tag into
1174          * certain ipv6 packets. Insert VLAN tags in driver,
1175          * and set event, completion, vlan bits accordingly
1176          * in the Tx WRB.
1177          */
1178         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179             be_vlan_tag_tx_chk(adapter, skb)) {
1180                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                 if (unlikely(!skb))
1182                         goto err;
1183         }
1184
1185         return skb;
1186 tx_drop:
1187         dev_kfree_skb_any(skb);
1188 err:
1189         return NULL;
1190 }
1191
1192 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                            struct sk_buff *skb,
1194                                            struct be_wrb_params *wrb_params)
1195 {
1196         int err;
1197
1198         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199          * packets that are 32b or less may cause a transmit stall
1200          * on that port. The workaround is to pad such packets
1201          * (len <= 32 bytes) to a minimum length of 36b.
1202          */
1203         if (skb->len <= 32) {
1204                 if (skb_put_padto(skb, 36))
1205                         return NULL;
1206         }
1207
1208         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                 if (!skb)
1211                         return NULL;
1212         }
1213
1214         /* The stack can send us skbs with length greater than
1215          * what the HW can handle. Trim the extra bytes.
1216          */
1217         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219         WARN_ON(err);
1220
1221         return skb;
1222 }
1223
1224 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225 {
1226         struct be_queue_info *txq = &txo->q;
1227         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229         /* Mark the last request eventable if it hasn't been marked already */
1230         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233         /* compose a dummy wrb if there are odd set of wrbs to notify */
1234         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                 wrb_fill_dummy(queue_head_node(txq));
1236                 queue_head_inc(txq);
1237                 atomic_inc(&txq->used);
1238                 txo->pend_wrb_cnt++;
1239                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                            TX_HDR_WRB_NUM_SHIFT);
1241                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                           TX_HDR_WRB_NUM_SHIFT);
1243         }
1244         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245         txo->pend_wrb_cnt = 0;
1246 }
1247
1248 /* OS2BMC related */
1249
1250 #define DHCP_CLIENT_PORT        68
1251 #define DHCP_SERVER_PORT        67
1252 #define NET_BIOS_PORT1          137
1253 #define NET_BIOS_PORT2          138
1254 #define DHCPV6_RAS_PORT         547
1255
1256 #define is_mc_allowed_on_bmc(adapter, eh)       \
1257         (!is_multicast_filt_enabled(adapter) && \
1258          is_multicast_ether_addr(eh->h_dest) && \
1259          !is_broadcast_ether_addr(eh->h_dest))
1260
1261 #define is_bc_allowed_on_bmc(adapter, eh)       \
1262         (!is_broadcast_filt_enabled(adapter) && \
1263          is_broadcast_ether_addr(eh->h_dest))
1264
1265 #define is_arp_allowed_on_bmc(adapter, skb)     \
1266         (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1269
1270 #define is_arp_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273 #define is_dhcp_client_filt_enabled(adapter)    \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276 #define is_dhcp_srvr_filt_enabled(adapter)      \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279 #define is_nbios_filt_enabled(adapter)  \
1280                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282 #define is_ipv6_na_filt_enabled(adapter)        \
1283                 (adapter->bmc_filt_mask &       \
1284                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286 #define is_ipv6_ra_filt_enabled(adapter)        \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289 #define is_ipv6_ras_filt_enabled(adapter)       \
1290                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292 #define is_broadcast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295 #define is_multicast_filt_enabled(adapter)      \
1296                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299                                struct sk_buff **skb)
1300 {
1301         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302         bool os2bmc = false;
1303
1304         if (!be_is_os2bmc_enabled(adapter))
1305                 goto done;
1306
1307         if (!is_multicast_ether_addr(eh->h_dest))
1308                 goto done;
1309
1310         if (is_mc_allowed_on_bmc(adapter, eh) ||
1311             is_bc_allowed_on_bmc(adapter, eh) ||
1312             is_arp_allowed_on_bmc(adapter, (*skb))) {
1313                 os2bmc = true;
1314                 goto done;
1315         }
1316
1317         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319                 u8 nexthdr = hdr->nexthdr;
1320
1321                 if (nexthdr == IPPROTO_ICMPV6) {
1322                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324                         switch (icmp6->icmp6_type) {
1325                         case NDISC_ROUTER_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327                                 goto done;
1328                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1330                                 goto done;
1331                         default:
1332                                 break;
1333                         }
1334                 }
1335         }
1336
1337         if (is_udp_pkt((*skb))) {
1338                 struct udphdr *udp = udp_hdr((*skb));
1339
1340                 switch (ntohs(udp->dest)) {
1341                 case DHCP_CLIENT_PORT:
1342                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1343                         goto done;
1344                 case DHCP_SERVER_PORT:
1345                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346                         goto done;
1347                 case NET_BIOS_PORT1:
1348                 case NET_BIOS_PORT2:
1349                         os2bmc = is_nbios_filt_enabled(adapter);
1350                         goto done;
1351                 case DHCPV6_RAS_PORT:
1352                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353                         goto done;
1354                 default:
1355                         break;
1356                 }
1357         }
1358 done:
1359         /* For packets over a vlan, which are destined
1360          * to BMC, asic expects the vlan to be inline in the packet.
1361          */
1362         if (os2bmc)
1363                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365         return os2bmc;
1366 }
1367
1368 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369 {
1370         struct be_adapter *adapter = netdev_priv(netdev);
1371         u16 q_idx = skb_get_queue_mapping(skb);
1372         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373         struct be_wrb_params wrb_params = { 0 };
1374         bool flush = !netdev_xmit_more();
1375         u16 wrb_cnt;
1376
1377         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378         if (unlikely(!skb))
1379                 goto drop;
1380
1381         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384         if (unlikely(!wrb_cnt)) {
1385                 dev_kfree_skb_any(skb);
1386                 goto drop;
1387         }
1388
1389         /* if os2bmc is enabled and if the pkt is destined to bmc,
1390          * enqueue the pkt a 2nd time with mgmt bit set.
1391          */
1392         if (be_send_pkt_to_bmc(adapter, &skb)) {
1393                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395                 if (unlikely(!wrb_cnt))
1396                         goto drop;
1397                 else
1398                         skb_get(skb);
1399         }
1400
1401         if (be_is_txq_full(txo)) {
1402                 netif_stop_subqueue(netdev, q_idx);
1403                 tx_stats(txo)->tx_stops++;
1404         }
1405
1406         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407                 be_xmit_flush(adapter, txo);
1408
1409         return NETDEV_TX_OK;
1410 drop:
1411         tx_stats(txo)->tx_drv_drops++;
1412         /* Flush the already enqueued tx requests */
1413         if (flush && txo->pend_wrb_cnt)
1414                 be_xmit_flush(adapter, txo);
1415
1416         return NETDEV_TX_OK;
1417 }
1418
1419 static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420 {
1421         struct be_adapter *adapter = netdev_priv(netdev);
1422         struct device *dev = &adapter->pdev->dev;
1423         struct be_tx_obj *txo;
1424         struct sk_buff *skb;
1425         struct tcphdr *tcphdr;
1426         struct udphdr *udphdr;
1427         u32 *entry;
1428         int status;
1429         int i, j;
1430
1431         for_all_tx_queues(adapter, txo, i) {
1432                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433                          i, txo->q.head, txo->q.tail,
1434                          atomic_read(&txo->q.used), txo->q.id);
1435
1436                 entry = txo->q.dma_mem.va;
1437                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1439                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1440                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441                                          j, entry[j], entry[j + 1],
1442                                          entry[j + 2], entry[j + 3]);
1443                         }
1444                 }
1445
1446                 entry = txo->cq.dma_mem.va;
1447                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448                          i, txo->cq.head, txo->cq.tail,
1449                          atomic_read(&txo->cq.used));
1450                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1452                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1453                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454                                          j, entry[j], entry[j + 1],
1455                                          entry[j + 2], entry[j + 3]);
1456                         }
1457                 }
1458
1459                 for (j = 0; j < TX_Q_LEN; j++) {
1460                         if (txo->sent_skb_list[j]) {
1461                                 skb = txo->sent_skb_list[j];
1462                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463                                         tcphdr = tcp_hdr(skb);
1464                                         dev_info(dev, "TCP source port %d\n",
1465                                                  ntohs(tcphdr->source));
1466                                         dev_info(dev, "TCP dest port %d\n",
1467                                                  ntohs(tcphdr->dest));
1468                                         dev_info(dev, "TCP sequence num %d\n",
1469                                                  ntohs(tcphdr->seq));
1470                                         dev_info(dev, "TCP ack_seq %d\n",
1471                                                  ntohs(tcphdr->ack_seq));
1472                                 } else if (ip_hdr(skb)->protocol ==
1473                                            IPPROTO_UDP) {
1474                                         udphdr = udp_hdr(skb);
1475                                         dev_info(dev, "UDP source port %d\n",
1476                                                  ntohs(udphdr->source));
1477                                         dev_info(dev, "UDP dest port %d\n",
1478                                                  ntohs(udphdr->dest));
1479                                 }
1480                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481                                          j, skb, skb->len, skb->protocol);
1482                         }
1483                 }
1484         }
1485
1486         if (lancer_chip(adapter)) {
1487                 dev_info(dev, "Initiating reset due to tx timeout\n");
1488                 dev_info(dev, "Resetting adapter\n");
1489                 status = lancer_physdev_ctrl(adapter,
1490                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1491                 if (status)
1492                         dev_err(dev, "Reset failed .. Reboot server\n");
1493         }
1494 }
1495
1496 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497 {
1498         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1500 }
1501
1502 static int be_set_vlan_promisc(struct be_adapter *adapter)
1503 {
1504         struct device *dev = &adapter->pdev->dev;
1505         int status;
1506
1507         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508                 return 0;
1509
1510         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511         if (!status) {
1512                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514         } else {
1515                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516         }
1517         return status;
1518 }
1519
1520 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521 {
1522         struct device *dev = &adapter->pdev->dev;
1523         int status;
1524
1525         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526         if (!status) {
1527                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529         }
1530         return status;
1531 }
1532
1533 /*
1534  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535  * If the user configures more, place BE in vlan promiscuous mode.
1536  */
1537 static int be_vid_config(struct be_adapter *adapter)
1538 {
1539         struct device *dev = &adapter->pdev->dev;
1540         u16 vids[BE_NUM_VLANS_SUPPORTED];
1541         u16 num = 0, i = 0;
1542         int status = 0;
1543
1544         /* No need to change the VLAN state if the I/F is in promiscuous */
1545         if (adapter->netdev->flags & IFF_PROMISC)
1546                 return 0;
1547
1548         if (adapter->vlans_added > be_max_vlans(adapter))
1549                 return be_set_vlan_promisc(adapter);
1550
1551         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552                 status = be_clear_vlan_promisc(adapter);
1553                 if (status)
1554                         return status;
1555         }
1556         /* Construct VLAN Table to give to HW */
1557         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558                 vids[num++] = cpu_to_le16(i);
1559
1560         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561         if (status) {
1562                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1563                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1564                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565                     addl_status(status) ==
1566                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567                         return be_set_vlan_promisc(adapter);
1568         }
1569         return status;
1570 }
1571
1572 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573 {
1574         struct be_adapter *adapter = netdev_priv(netdev);
1575         int status = 0;
1576
1577         mutex_lock(&adapter->rx_filter_lock);
1578
1579         /* Packets with VID 0 are always received by Lancer by default */
1580         if (lancer_chip(adapter) && vid == 0)
1581                 goto done;
1582
1583         if (test_bit(vid, adapter->vids))
1584                 goto done;
1585
1586         set_bit(vid, adapter->vids);
1587         adapter->vlans_added++;
1588
1589         status = be_vid_config(adapter);
1590 done:
1591         mutex_unlock(&adapter->rx_filter_lock);
1592         return status;
1593 }
1594
1595 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598         int status = 0;
1599
1600         mutex_lock(&adapter->rx_filter_lock);
1601
1602         /* Packets with VID 0 are always received by Lancer by default */
1603         if (lancer_chip(adapter) && vid == 0)
1604                 goto done;
1605
1606         if (!test_bit(vid, adapter->vids))
1607                 goto done;
1608
1609         clear_bit(vid, adapter->vids);
1610         adapter->vlans_added--;
1611
1612         status = be_vid_config(adapter);
1613 done:
1614         mutex_unlock(&adapter->rx_filter_lock);
1615         return status;
1616 }
1617
1618 static void be_set_all_promisc(struct be_adapter *adapter)
1619 {
1620         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622 }
1623
1624 static void be_set_mc_promisc(struct be_adapter *adapter)
1625 {
1626         int status;
1627
1628         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629                 return;
1630
1631         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632         if (!status)
1633                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634 }
1635
1636 static void be_set_uc_promisc(struct be_adapter *adapter)
1637 {
1638         int status;
1639
1640         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641                 return;
1642
1643         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644         if (!status)
1645                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646 }
1647
1648 static void be_clear_uc_promisc(struct be_adapter *adapter)
1649 {
1650         int status;
1651
1652         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653                 return;
1654
1655         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656         if (!status)
1657                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658 }
1659
1660 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661  * We use a single callback function for both sync and unsync. We really don't
1662  * add/remove addresses through this callback. But, we use it to detect changes
1663  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664  */
1665 static int be_uc_list_update(struct net_device *netdev,
1666                              const unsigned char *addr)
1667 {
1668         struct be_adapter *adapter = netdev_priv(netdev);
1669
1670         adapter->update_uc_list = true;
1671         return 0;
1672 }
1673
1674 static int be_mc_list_update(struct net_device *netdev,
1675                              const unsigned char *addr)
1676 {
1677         struct be_adapter *adapter = netdev_priv(netdev);
1678
1679         adapter->update_mc_list = true;
1680         return 0;
1681 }
1682
1683 static void be_set_mc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool mc_promisc = false;
1688         int status;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_mc_list = false;
1695         } else if (netdev->flags & IFF_ALLMULTI ||
1696                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697                 /* Enable multicast promisc if num configured exceeds
1698                  * what we support
1699                  */
1700                 mc_promisc = true;
1701                 adapter->update_mc_list = false;
1702         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703                 /* Update mc-list unconditionally if the iface was previously
1704                  * in mc-promisc mode and now is out of that mode.
1705                  */
1706                 adapter->update_mc_list = true;
1707         }
1708
1709         if (adapter->update_mc_list) {
1710                 int i = 0;
1711
1712                 /* cache the mc-list in adapter */
1713                 netdev_for_each_mc_addr(ha, netdev) {
1714                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715                         i++;
1716                 }
1717                 adapter->mc_count = netdev_mc_count(netdev);
1718         }
1719         netif_addr_unlock_bh(netdev);
1720
1721         if (mc_promisc) {
1722                 be_set_mc_promisc(adapter);
1723         } else if (adapter->update_mc_list) {
1724                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725                 if (!status)
1726                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727                 else
1728                         be_set_mc_promisc(adapter);
1729
1730                 adapter->update_mc_list = false;
1731         }
1732 }
1733
1734 static void be_clear_mc_list(struct be_adapter *adapter)
1735 {
1736         struct net_device *netdev = adapter->netdev;
1737
1738         __dev_mc_unsync(netdev, NULL);
1739         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740         adapter->mc_count = 0;
1741 }
1742
1743 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744 {
1745         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747                 return 0;
1748         }
1749
1750         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751                                adapter->if_handle,
1752                                &adapter->pmac_id[uc_idx + 1], 0);
1753 }
1754
1755 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756 {
1757         if (pmac_id == adapter->pmac_id[0])
1758                 return;
1759
1760         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761 }
1762
1763 static void be_set_uc_list(struct be_adapter *adapter)
1764 {
1765         struct net_device *netdev = adapter->netdev;
1766         struct netdev_hw_addr *ha;
1767         bool uc_promisc = false;
1768         int curr_uc_macs = 0, i;
1769
1770         netif_addr_lock_bh(netdev);
1771         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773         if (netdev->flags & IFF_PROMISC) {
1774                 adapter->update_uc_list = false;
1775         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776                 uc_promisc = true;
1777                 adapter->update_uc_list = false;
1778         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779                 /* Update uc-list unconditionally if the iface was previously
1780                  * in uc-promisc mode and now is out of that mode.
1781                  */
1782                 adapter->update_uc_list = true;
1783         }
1784
1785         if (adapter->update_uc_list) {
1786                 /* cache the uc-list in adapter array */
1787                 i = 0;
1788                 netdev_for_each_uc_addr(ha, netdev) {
1789                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790                         i++;
1791                 }
1792                 curr_uc_macs = netdev_uc_count(netdev);
1793         }
1794         netif_addr_unlock_bh(netdev);
1795
1796         if (uc_promisc) {
1797                 be_set_uc_promisc(adapter);
1798         } else if (adapter->update_uc_list) {
1799                 be_clear_uc_promisc(adapter);
1800
1801                 for (i = 0; i < adapter->uc_macs; i++)
1802                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804                 for (i = 0; i < curr_uc_macs; i++)
1805                         be_uc_mac_add(adapter, i);
1806                 adapter->uc_macs = curr_uc_macs;
1807                 adapter->update_uc_list = false;
1808         }
1809 }
1810
1811 static void be_clear_uc_list(struct be_adapter *adapter)
1812 {
1813         struct net_device *netdev = adapter->netdev;
1814         int i;
1815
1816         __dev_uc_unsync(netdev, NULL);
1817         for (i = 0; i < adapter->uc_macs; i++)
1818                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820         adapter->uc_macs = 0;
1821 }
1822
1823 static void __be_set_rx_mode(struct be_adapter *adapter)
1824 {
1825         struct net_device *netdev = adapter->netdev;
1826
1827         mutex_lock(&adapter->rx_filter_lock);
1828
1829         if (netdev->flags & IFF_PROMISC) {
1830                 if (!be_in_all_promisc(adapter))
1831                         be_set_all_promisc(adapter);
1832         } else if (be_in_all_promisc(adapter)) {
1833                 /* We need to re-program the vlan-list or clear
1834                  * vlan-promisc mode (if needed) when the interface
1835                  * comes out of promisc mode.
1836                  */
1837                 be_vid_config(adapter);
1838         }
1839
1840         be_set_uc_list(adapter);
1841         be_set_mc_list(adapter);
1842
1843         mutex_unlock(&adapter->rx_filter_lock);
1844 }
1845
1846 static void be_work_set_rx_mode(struct work_struct *work)
1847 {
1848         struct be_cmd_work *cmd_work =
1849                                 container_of(work, struct be_cmd_work, work);
1850
1851         __be_set_rx_mode(cmd_work->adapter);
1852         kfree(cmd_work);
1853 }
1854
1855 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856 {
1857         struct be_adapter *adapter = netdev_priv(netdev);
1858         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859         int status;
1860
1861         if (!sriov_enabled(adapter))
1862                 return -EPERM;
1863
1864         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865                 return -EINVAL;
1866
1867         /* Proceed further only if user provided MAC is different
1868          * from active MAC
1869          */
1870         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871                 return 0;
1872
1873         if (BEx_chip(adapter)) {
1874                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875                                 vf + 1);
1876
1877                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878                                          &vf_cfg->pmac_id, vf + 1);
1879         } else {
1880                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881                                         vf + 1);
1882         }
1883
1884         if (status) {
1885                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886                         mac, vf, status);
1887                 return be_cmd_status(status);
1888         }
1889
1890         ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892         return 0;
1893 }
1894
1895 static int be_get_vf_config(struct net_device *netdev, int vf,
1896                             struct ifla_vf_info *vi)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901         if (!sriov_enabled(adapter))
1902                 return -EPERM;
1903
1904         if (vf >= adapter->num_vfs)
1905                 return -EINVAL;
1906
1907         vi->vf = vf;
1908         vi->max_tx_rate = vf_cfg->tx_rate;
1909         vi->min_tx_rate = 0;
1910         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916         return 0;
1917 }
1918
1919 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920 {
1921         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922         u16 vids[BE_NUM_VLANS_SUPPORTED];
1923         int vf_if_id = vf_cfg->if_handle;
1924         int status;
1925
1926         /* Enable Transparent VLAN Tagging */
1927         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928         if (status)
1929                 return status;
1930
1931         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932         vids[0] = 0;
1933         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934         if (!status)
1935                 dev_info(&adapter->pdev->dev,
1936                          "Cleared guest VLANs on VF%d", vf);
1937
1938         /* After TVT is enabled, disallow VFs to program VLAN filters */
1939         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1942                 if (!status)
1943                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944         }
1945         return 0;
1946 }
1947
1948 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949 {
1950         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951         struct device *dev = &adapter->pdev->dev;
1952         int status;
1953
1954         /* Reset Transparent VLAN Tagging. */
1955         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956                                        vf_cfg->if_handle, 0, 0);
1957         if (status)
1958                 return status;
1959
1960         /* Allow VFs to program VLAN filtering */
1961         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963                                                   BE_PRIV_FILTMGMT, vf + 1);
1964                 if (!status) {
1965                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967                 }
1968         }
1969
1970         dev_info(dev,
1971                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972         return 0;
1973 }
1974
1975 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976                           __be16 vlan_proto)
1977 {
1978         struct be_adapter *adapter = netdev_priv(netdev);
1979         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980         int status;
1981
1982         if (!sriov_enabled(adapter))
1983                 return -EPERM;
1984
1985         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986                 return -EINVAL;
1987
1988         if (vlan_proto != htons(ETH_P_8021Q))
1989                 return -EPROTONOSUPPORT;
1990
1991         if (vlan || qos) {
1992                 vlan |= qos << VLAN_PRIO_SHIFT;
1993                 status = be_set_vf_tvt(adapter, vf, vlan);
1994         } else {
1995                 status = be_clear_vf_tvt(adapter, vf);
1996         }
1997
1998         if (status) {
1999                 dev_err(&adapter->pdev->dev,
2000                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001                         status);
2002                 return be_cmd_status(status);
2003         }
2004
2005         vf_cfg->vlan_tag = vlan;
2006         return 0;
2007 }
2008
2009 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010                              int min_tx_rate, int max_tx_rate)
2011 {
2012         struct be_adapter *adapter = netdev_priv(netdev);
2013         struct device *dev = &adapter->pdev->dev;
2014         int percent_rate, status = 0;
2015         u16 link_speed = 0;
2016         u8 link_status;
2017
2018         if (!sriov_enabled(adapter))
2019                 return -EPERM;
2020
2021         if (vf >= adapter->num_vfs)
2022                 return -EINVAL;
2023
2024         if (min_tx_rate)
2025                 return -EINVAL;
2026
2027         if (!max_tx_rate)
2028                 goto config_qos;
2029
2030         status = be_cmd_link_status_query(adapter, &link_speed,
2031                                           &link_status, 0);
2032         if (status)
2033                 goto err;
2034
2035         if (!link_status) {
2036                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037                 status = -ENETDOWN;
2038                 goto err;
2039         }
2040
2041         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043                         link_speed);
2044                 status = -EINVAL;
2045                 goto err;
2046         }
2047
2048         /* On Skyhawk the QOS setting must be done only as a % value */
2049         percent_rate = link_speed / 100;
2050         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052                         percent_rate);
2053                 status = -EINVAL;
2054                 goto err;
2055         }
2056
2057 config_qos:
2058         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059         if (status)
2060                 goto err;
2061
2062         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063         return 0;
2064
2065 err:
2066         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067                 max_tx_rate, vf);
2068         return be_cmd_status(status);
2069 }
2070
2071 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072                                 int link_state)
2073 {
2074         struct be_adapter *adapter = netdev_priv(netdev);
2075         int status;
2076
2077         if (!sriov_enabled(adapter))
2078                 return -EPERM;
2079
2080         if (vf >= adapter->num_vfs)
2081                 return -EINVAL;
2082
2083         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084         if (status) {
2085                 dev_err(&adapter->pdev->dev,
2086                         "Link state change on VF %d failed: %#x\n", vf, status);
2087                 return be_cmd_status(status);
2088         }
2089
2090         adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092         return 0;
2093 }
2094
2095 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096 {
2097         struct be_adapter *adapter = netdev_priv(netdev);
2098         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099         u8 spoofchk;
2100         int status;
2101
2102         if (!sriov_enabled(adapter))
2103                 return -EPERM;
2104
2105         if (vf >= adapter->num_vfs)
2106                 return -EINVAL;
2107
2108         if (BEx_chip(adapter))
2109                 return -EOPNOTSUPP;
2110
2111         if (enable == vf_cfg->spoofchk)
2112                 return 0;
2113
2114         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117                                        0, spoofchk);
2118         if (status) {
2119                 dev_err(&adapter->pdev->dev,
2120                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2121                 return be_cmd_status(status);
2122         }
2123
2124         vf_cfg->spoofchk = enable;
2125         return 0;
2126 }
2127
2128 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129                           ulong now)
2130 {
2131         aic->rx_pkts_prev = rx_pkts;
2132         aic->tx_reqs_prev = tx_pkts;
2133         aic->jiffies = now;
2134 }
2135
2136 static int be_get_new_eqd(struct be_eq_obj *eqo)
2137 {
2138         struct be_adapter *adapter = eqo->adapter;
2139         int eqd, start;
2140         struct be_aic_obj *aic;
2141         struct be_rx_obj *rxo;
2142         struct be_tx_obj *txo;
2143         u64 rx_pkts = 0, tx_pkts = 0;
2144         ulong now;
2145         u32 pps, delta;
2146         int i;
2147
2148         aic = &adapter->aic_obj[eqo->idx];
2149         if (!adapter->aic_enabled) {
2150                 if (aic->jiffies)
2151                         aic->jiffies = 0;
2152                 eqd = aic->et_eqd;
2153                 return eqd;
2154         }
2155
2156         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157                 do {
2158                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2159                         rx_pkts += rxo->stats.rx_pkts;
2160                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2161         }
2162
2163         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164                 do {
2165                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2166                         tx_pkts += txo->stats.tx_reqs;
2167                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2168         }
2169
2170         /* Skip, if wrapped around or first calculation */
2171         now = jiffies;
2172         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173             rx_pkts < aic->rx_pkts_prev ||
2174             tx_pkts < aic->tx_reqs_prev) {
2175                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2176                 return aic->prev_eqd;
2177         }
2178
2179         delta = jiffies_to_msecs(now - aic->jiffies);
2180         if (delta == 0)
2181                 return aic->prev_eqd;
2182
2183         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185         eqd = (pps / 15000) << 2;
2186
2187         if (eqd < 8)
2188                 eqd = 0;
2189         eqd = min_t(u32, eqd, aic->max_eqd);
2190         eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192         be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194         return eqd;
2195 }
2196
2197 /* For Skyhawk-R only */
2198 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199 {
2200         struct be_adapter *adapter = eqo->adapter;
2201         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202         ulong now = jiffies;
2203         int eqd;
2204         u32 mult_enc;
2205
2206         if (!adapter->aic_enabled)
2207                 return 0;
2208
2209         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210                 eqd = aic->prev_eqd;
2211         else
2212                 eqd = be_get_new_eqd(eqo);
2213
2214         if (eqd > 100)
2215                 mult_enc = R2I_DLY_ENC_1;
2216         else if (eqd > 60)
2217                 mult_enc = R2I_DLY_ENC_2;
2218         else if (eqd > 20)
2219                 mult_enc = R2I_DLY_ENC_3;
2220         else
2221                 mult_enc = R2I_DLY_ENC_0;
2222
2223         aic->prev_eqd = eqd;
2224
2225         return mult_enc;
2226 }
2227
2228 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229 {
2230         struct be_set_eqd set_eqd[MAX_EVT_QS];
2231         struct be_aic_obj *aic;
2232         struct be_eq_obj *eqo;
2233         int i, num = 0, eqd;
2234
2235         for_all_evt_queues(adapter, eqo, i) {
2236                 aic = &adapter->aic_obj[eqo->idx];
2237                 eqd = be_get_new_eqd(eqo);
2238                 if (force_update || eqd != aic->prev_eqd) {
2239                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240                         set_eqd[num].eq_id = eqo->q.id;
2241                         aic->prev_eqd = eqd;
2242                         num++;
2243                 }
2244         }
2245
2246         if (num)
2247                 be_cmd_modify_eqd(adapter, set_eqd, num);
2248 }
2249
2250 static void be_rx_stats_update(struct be_rx_obj *rxo,
2251                                struct be_rx_compl_info *rxcp)
2252 {
2253         struct be_rx_stats *stats = rx_stats(rxo);
2254
2255         u64_stats_update_begin(&stats->sync);
2256         stats->rx_compl++;
2257         stats->rx_bytes += rxcp->pkt_size;
2258         stats->rx_pkts++;
2259         if (rxcp->tunneled)
2260                 stats->rx_vxlan_offload_pkts++;
2261         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262                 stats->rx_mcast_pkts++;
2263         if (rxcp->err)
2264                 stats->rx_compl_err++;
2265         u64_stats_update_end(&stats->sync);
2266 }
2267
2268 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269 {
2270         /* L4 checksum is not reliable for non TCP/UDP packets.
2271          * Also ignore ipcksm for ipv6 pkts
2272          */
2273         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275 }
2276
2277 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278 {
2279         struct be_adapter *adapter = rxo->adapter;
2280         struct be_rx_page_info *rx_page_info;
2281         struct be_queue_info *rxq = &rxo->q;
2282         u32 frag_idx = rxq->tail;
2283
2284         rx_page_info = &rxo->page_info_tbl[frag_idx];
2285         BUG_ON(!rx_page_info->page);
2286
2287         if (rx_page_info->last_frag) {
2288                 dma_unmap_page(&adapter->pdev->dev,
2289                                dma_unmap_addr(rx_page_info, bus),
2290                                adapter->big_page_size, DMA_FROM_DEVICE);
2291                 rx_page_info->last_frag = false;
2292         } else {
2293                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2294                                         dma_unmap_addr(rx_page_info, bus),
2295                                         rx_frag_size, DMA_FROM_DEVICE);
2296         }
2297
2298         queue_tail_inc(rxq);
2299         atomic_dec(&rxq->used);
2300         return rx_page_info;
2301 }
2302
2303 /* Throwaway the data in the Rx completion */
2304 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305                                 struct be_rx_compl_info *rxcp)
2306 {
2307         struct be_rx_page_info *page_info;
2308         u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310         for (i = 0; i < num_rcvd; i++) {
2311                 page_info = get_rx_page_info(rxo);
2312                 put_page(page_info->page);
2313                 memset(page_info, 0, sizeof(*page_info));
2314         }
2315 }
2316
2317 /*
2318  * skb_fill_rx_data forms a complete skb for an ether frame
2319  * indicated by rxcp.
2320  */
2321 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322                              struct be_rx_compl_info *rxcp)
2323 {
2324         struct be_rx_page_info *page_info;
2325         u16 i, j;
2326         u16 hdr_len, curr_frag_len, remaining;
2327         u8 *start;
2328
2329         page_info = get_rx_page_info(rxo);
2330         start = page_address(page_info->page) + page_info->page_offset;
2331         prefetch(start);
2332
2333         /* Copy data in the first descriptor of this completion */
2334         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336         skb->len = curr_frag_len;
2337         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338                 memcpy(skb->data, start, curr_frag_len);
2339                 /* Complete packet has now been moved to data */
2340                 put_page(page_info->page);
2341                 skb->data_len = 0;
2342                 skb->tail += curr_frag_len;
2343         } else {
2344                 hdr_len = ETH_HLEN;
2345                 memcpy(skb->data, start, hdr_len);
2346                 skb_shinfo(skb)->nr_frags = 1;
2347                 skb_frag_set_page(skb, 0, page_info->page);
2348                 skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2349                                  page_info->page_offset + hdr_len);
2350                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2351                                   curr_frag_len - hdr_len);
2352                 skb->data_len = curr_frag_len - hdr_len;
2353                 skb->truesize += rx_frag_size;
2354                 skb->tail += hdr_len;
2355         }
2356         page_info->page = NULL;
2357
2358         if (rxcp->pkt_size <= rx_frag_size) {
2359                 BUG_ON(rxcp->num_rcvd != 1);
2360                 return;
2361         }
2362
2363         /* More frags present for this completion */
2364         remaining = rxcp->pkt_size - curr_frag_len;
2365         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2366                 page_info = get_rx_page_info(rxo);
2367                 curr_frag_len = min(remaining, rx_frag_size);
2368
2369                 /* Coalesce all frags from the same physical page in one slot */
2370                 if (page_info->page_offset == 0) {
2371                         /* Fresh page */
2372                         j++;
2373                         skb_frag_set_page(skb, j, page_info->page);
2374                         skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2375                                          page_info->page_offset);
2376                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2377                         skb_shinfo(skb)->nr_frags++;
2378                 } else {
2379                         put_page(page_info->page);
2380                 }
2381
2382                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                 skb->len += curr_frag_len;
2384                 skb->data_len += curr_frag_len;
2385                 skb->truesize += rx_frag_size;
2386                 remaining -= curr_frag_len;
2387                 page_info->page = NULL;
2388         }
2389         BUG_ON(j > MAX_SKB_FRAGS);
2390 }
2391
2392 /* Process the RX completion indicated by rxcp when GRO is disabled */
2393 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394                                 struct be_rx_compl_info *rxcp)
2395 {
2396         struct be_adapter *adapter = rxo->adapter;
2397         struct net_device *netdev = adapter->netdev;
2398         struct sk_buff *skb;
2399
2400         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401         if (unlikely(!skb)) {
2402                 rx_stats(rxo)->rx_drops_no_skbs++;
2403                 be_rx_compl_discard(rxo, rxcp);
2404                 return;
2405         }
2406
2407         skb_fill_rx_data(rxo, skb, rxcp);
2408
2409         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2411         else
2412                 skb_checksum_none_assert(skb);
2413
2414         skb->protocol = eth_type_trans(skb, netdev);
2415         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416         if (netdev->features & NETIF_F_RXHASH)
2417                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419         skb->csum_level = rxcp->tunneled;
2420         skb_mark_napi_id(skb, napi);
2421
2422         if (rxcp->vlanf)
2423                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425         netif_receive_skb(skb);
2426 }
2427
2428 /* Process the RX completion indicated by rxcp when GRO is enabled */
2429 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430                                     struct napi_struct *napi,
2431                                     struct be_rx_compl_info *rxcp)
2432 {
2433         struct be_adapter *adapter = rxo->adapter;
2434         struct be_rx_page_info *page_info;
2435         struct sk_buff *skb = NULL;
2436         u16 remaining, curr_frag_len;
2437         u16 i, j;
2438
2439         skb = napi_get_frags(napi);
2440         if (!skb) {
2441                 be_rx_compl_discard(rxo, rxcp);
2442                 return;
2443         }
2444
2445         remaining = rxcp->pkt_size;
2446         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447                 page_info = get_rx_page_info(rxo);
2448
2449                 curr_frag_len = min(remaining, rx_frag_size);
2450
2451                 /* Coalesce all frags from the same physical page in one slot */
2452                 if (i == 0 || page_info->page_offset == 0) {
2453                         /* First frag or Fresh page */
2454                         j++;
2455                         skb_frag_set_page(skb, j, page_info->page);
2456                         skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2457                                          page_info->page_offset);
2458                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2459                 } else {
2460                         put_page(page_info->page);
2461                 }
2462                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2463                 skb->truesize += rx_frag_size;
2464                 remaining -= curr_frag_len;
2465                 memset(page_info, 0, sizeof(*page_info));
2466         }
2467         BUG_ON(j > MAX_SKB_FRAGS);
2468
2469         skb_shinfo(skb)->nr_frags = j + 1;
2470         skb->len = rxcp->pkt_size;
2471         skb->data_len = rxcp->pkt_size;
2472         skb->ip_summed = CHECKSUM_UNNECESSARY;
2473         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2474         if (adapter->netdev->features & NETIF_F_RXHASH)
2475                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2476
2477         skb->csum_level = rxcp->tunneled;
2478
2479         if (rxcp->vlanf)
2480                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2481
2482         napi_gro_frags(napi);
2483 }
2484
2485 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2486                                  struct be_rx_compl_info *rxcp)
2487 {
2488         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2489         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2490         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2491         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2492         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2493         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2494         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2495         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2496         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2497         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2498         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2499         if (rxcp->vlanf) {
2500                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2501                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2502         }
2503         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2504         rxcp->tunneled =
2505                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2506 }
2507
2508 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2509                                  struct be_rx_compl_info *rxcp)
2510 {
2511         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2512         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2513         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2514         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2515         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2516         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2517         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2518         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2519         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2520         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2521         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2522         if (rxcp->vlanf) {
2523                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2524                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2525         }
2526         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2527         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2528 }
2529
2530 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2531 {
2532         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2533         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2534         struct be_adapter *adapter = rxo->adapter;
2535
2536         /* For checking the valid bit it is Ok to use either definition as the
2537          * valid bit is at the same position in both v0 and v1 Rx compl */
2538         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2539                 return NULL;
2540
2541         rmb();
2542         be_dws_le_to_cpu(compl, sizeof(*compl));
2543
2544         if (adapter->be3_native)
2545                 be_parse_rx_compl_v1(compl, rxcp);
2546         else
2547                 be_parse_rx_compl_v0(compl, rxcp);
2548
2549         if (rxcp->ip_frag)
2550                 rxcp->l4_csum = 0;
2551
2552         if (rxcp->vlanf) {
2553                 /* In QNQ modes, if qnq bit is not set, then the packet was
2554                  * tagged only with the transparent outer vlan-tag and must
2555                  * not be treated as a vlan packet by host
2556                  */
2557                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2558                         rxcp->vlanf = 0;
2559
2560                 if (!lancer_chip(adapter))
2561                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2562
2563                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2564                     !test_bit(rxcp->vlan_tag, adapter->vids))
2565                         rxcp->vlanf = 0;
2566         }
2567
2568         /* As the compl has been parsed, reset it; we wont touch it again */
2569         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2570
2571         queue_tail_inc(&rxo->cq);
2572         return rxcp;
2573 }
2574
2575 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2576 {
2577         u32 order = get_order(size);
2578
2579         if (order > 0)
2580                 gfp |= __GFP_COMP;
2581         return  alloc_pages(gfp, order);
2582 }
2583
2584 /*
2585  * Allocate a page, split it to fragments of size rx_frag_size and post as
2586  * receive buffers to BE
2587  */
2588 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2589 {
2590         struct be_adapter *adapter = rxo->adapter;
2591         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2592         struct be_queue_info *rxq = &rxo->q;
2593         struct page *pagep = NULL;
2594         struct device *dev = &adapter->pdev->dev;
2595         struct be_eth_rx_d *rxd;
2596         u64 page_dmaaddr = 0, frag_dmaaddr;
2597         u32 posted, page_offset = 0, notify = 0;
2598
2599         page_info = &rxo->page_info_tbl[rxq->head];
2600         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2601                 if (!pagep) {
2602                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2603                         if (unlikely(!pagep)) {
2604                                 rx_stats(rxo)->rx_post_fail++;
2605                                 break;
2606                         }
2607                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2608                                                     adapter->big_page_size,
2609                                                     DMA_FROM_DEVICE);
2610                         if (dma_mapping_error(dev, page_dmaaddr)) {
2611                                 put_page(pagep);
2612                                 pagep = NULL;
2613                                 adapter->drv_stats.dma_map_errors++;
2614                                 break;
2615                         }
2616                         page_offset = 0;
2617                 } else {
2618                         get_page(pagep);
2619                         page_offset += rx_frag_size;
2620                 }
2621                 page_info->page_offset = page_offset;
2622                 page_info->page = pagep;
2623
2624                 rxd = queue_head_node(rxq);
2625                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2626                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2627                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2628
2629                 /* Any space left in the current big page for another frag? */
2630                 if ((page_offset + rx_frag_size + rx_frag_size) >
2631                                         adapter->big_page_size) {
2632                         pagep = NULL;
2633                         page_info->last_frag = true;
2634                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2635                 } else {
2636                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2637                 }
2638
2639                 prev_page_info = page_info;
2640                 queue_head_inc(rxq);
2641                 page_info = &rxo->page_info_tbl[rxq->head];
2642         }
2643
2644         /* Mark the last frag of a page when we break out of the above loop
2645          * with no more slots available in the RXQ
2646          */
2647         if (pagep) {
2648                 prev_page_info->last_frag = true;
2649                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2650         }
2651
2652         if (posted) {
2653                 atomic_add(posted, &rxq->used);
2654                 if (rxo->rx_post_starved)
2655                         rxo->rx_post_starved = false;
2656                 do {
2657                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2658                         be_rxq_notify(adapter, rxq->id, notify);
2659                         posted -= notify;
2660                 } while (posted);
2661         } else if (atomic_read(&rxq->used) == 0) {
2662                 /* Let be_worker replenish when memory is available */
2663                 rxo->rx_post_starved = true;
2664         }
2665 }
2666
2667 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2668 {
2669         switch (status) {
2670         case BE_TX_COMP_HDR_PARSE_ERR:
2671                 tx_stats(txo)->tx_hdr_parse_err++;
2672                 break;
2673         case BE_TX_COMP_NDMA_ERR:
2674                 tx_stats(txo)->tx_dma_err++;
2675                 break;
2676         case BE_TX_COMP_ACL_ERR:
2677                 tx_stats(txo)->tx_spoof_check_err++;
2678                 break;
2679         }
2680 }
2681
2682 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2683 {
2684         switch (status) {
2685         case LANCER_TX_COMP_LSO_ERR:
2686                 tx_stats(txo)->tx_tso_err++;
2687                 break;
2688         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2689         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2690                 tx_stats(txo)->tx_spoof_check_err++;
2691                 break;
2692         case LANCER_TX_COMP_QINQ_ERR:
2693                 tx_stats(txo)->tx_qinq_err++;
2694                 break;
2695         case LANCER_TX_COMP_PARITY_ERR:
2696                 tx_stats(txo)->tx_internal_parity_err++;
2697                 break;
2698         case LANCER_TX_COMP_DMA_ERR:
2699                 tx_stats(txo)->tx_dma_err++;
2700                 break;
2701         case LANCER_TX_COMP_SGE_ERR:
2702                 tx_stats(txo)->tx_sge_err++;
2703                 break;
2704         }
2705 }
2706
2707 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2708                                                 struct be_tx_obj *txo)
2709 {
2710         struct be_queue_info *tx_cq = &txo->cq;
2711         struct be_tx_compl_info *txcp = &txo->txcp;
2712         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2713
2714         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2715                 return NULL;
2716
2717         /* Ensure load ordering of valid bit dword and other dwords below */
2718         rmb();
2719         be_dws_le_to_cpu(compl, sizeof(*compl));
2720
2721         txcp->status = GET_TX_COMPL_BITS(status, compl);
2722         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2723
2724         if (txcp->status) {
2725                 if (lancer_chip(adapter)) {
2726                         lancer_update_tx_err(txo, txcp->status);
2727                         /* Reset the adapter incase of TSO,
2728                          * SGE or Parity error
2729                          */
2730                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2731                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2732                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2733                                 be_set_error(adapter, BE_ERROR_TX);
2734                 } else {
2735                         be_update_tx_err(txo, txcp->status);
2736                 }
2737         }
2738
2739         if (be_check_error(adapter, BE_ERROR_TX))
2740                 return NULL;
2741
2742         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2743         queue_tail_inc(tx_cq);
2744         return txcp;
2745 }
2746
2747 static u16 be_tx_compl_process(struct be_adapter *adapter,
2748                                struct be_tx_obj *txo, u16 last_index)
2749 {
2750         struct sk_buff **sent_skbs = txo->sent_skb_list;
2751         struct be_queue_info *txq = &txo->q;
2752         struct sk_buff *skb = NULL;
2753         bool unmap_skb_hdr = false;
2754         struct be_eth_wrb *wrb;
2755         u16 num_wrbs = 0;
2756         u32 frag_index;
2757
2758         do {
2759                 if (sent_skbs[txq->tail]) {
2760                         /* Free skb from prev req */
2761                         if (skb)
2762                                 dev_consume_skb_any(skb);
2763                         skb = sent_skbs[txq->tail];
2764                         sent_skbs[txq->tail] = NULL;
2765                         queue_tail_inc(txq);  /* skip hdr wrb */
2766                         num_wrbs++;
2767                         unmap_skb_hdr = true;
2768                 }
2769                 wrb = queue_tail_node(txq);
2770                 frag_index = txq->tail;
2771                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2772                               (unmap_skb_hdr && skb_headlen(skb)));
2773                 unmap_skb_hdr = false;
2774                 queue_tail_inc(txq);
2775                 num_wrbs++;
2776         } while (frag_index != last_index);
2777         dev_consume_skb_any(skb);
2778
2779         return num_wrbs;
2780 }
2781
2782 /* Return the number of events in the event queue */
2783 static inline int events_get(struct be_eq_obj *eqo)
2784 {
2785         struct be_eq_entry *eqe;
2786         int num = 0;
2787
2788         do {
2789                 eqe = queue_tail_node(&eqo->q);
2790                 if (eqe->evt == 0)
2791                         break;
2792
2793                 rmb();
2794                 eqe->evt = 0;
2795                 num++;
2796                 queue_tail_inc(&eqo->q);
2797         } while (true);
2798
2799         return num;
2800 }
2801
2802 /* Leaves the EQ is disarmed state */
2803 static void be_eq_clean(struct be_eq_obj *eqo)
2804 {
2805         int num = events_get(eqo);
2806
2807         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2808 }
2809
2810 /* Free posted rx buffers that were not used */
2811 static void be_rxq_clean(struct be_rx_obj *rxo)
2812 {
2813         struct be_queue_info *rxq = &rxo->q;
2814         struct be_rx_page_info *page_info;
2815
2816         while (atomic_read(&rxq->used) > 0) {
2817                 page_info = get_rx_page_info(rxo);
2818                 put_page(page_info->page);
2819                 memset(page_info, 0, sizeof(*page_info));
2820         }
2821         BUG_ON(atomic_read(&rxq->used));
2822         rxq->tail = 0;
2823         rxq->head = 0;
2824 }
2825
2826 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2827 {
2828         struct be_queue_info *rx_cq = &rxo->cq;
2829         struct be_rx_compl_info *rxcp;
2830         struct be_adapter *adapter = rxo->adapter;
2831         int flush_wait = 0;
2832
2833         /* Consume pending rx completions.
2834          * Wait for the flush completion (identified by zero num_rcvd)
2835          * to arrive. Notify CQ even when there are no more CQ entries
2836          * for HW to flush partially coalesced CQ entries.
2837          * In Lancer, there is no need to wait for flush compl.
2838          */
2839         for (;;) {
2840                 rxcp = be_rx_compl_get(rxo);
2841                 if (!rxcp) {
2842                         if (lancer_chip(adapter))
2843                                 break;
2844
2845                         if (flush_wait++ > 50 ||
2846                             be_check_error(adapter,
2847                                            BE_ERROR_HW)) {
2848                                 dev_warn(&adapter->pdev->dev,
2849                                          "did not receive flush compl\n");
2850                                 break;
2851                         }
2852                         be_cq_notify(adapter, rx_cq->id, true, 0);
2853                         mdelay(1);
2854                 } else {
2855                         be_rx_compl_discard(rxo, rxcp);
2856                         be_cq_notify(adapter, rx_cq->id, false, 1);
2857                         if (rxcp->num_rcvd == 0)
2858                                 break;
2859                 }
2860         }
2861
2862         /* After cleanup, leave the CQ in unarmed state */
2863         be_cq_notify(adapter, rx_cq->id, false, 0);
2864 }
2865
2866 static void be_tx_compl_clean(struct be_adapter *adapter)
2867 {
2868         struct device *dev = &adapter->pdev->dev;
2869         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2870         struct be_tx_compl_info *txcp;
2871         struct be_queue_info *txq;
2872         u32 end_idx, notified_idx;
2873         struct be_tx_obj *txo;
2874         int i, pending_txqs;
2875
2876         /* Stop polling for compls when HW has been silent for 10ms */
2877         do {
2878                 pending_txqs = adapter->num_tx_qs;
2879
2880                 for_all_tx_queues(adapter, txo, i) {
2881                         cmpl = 0;
2882                         num_wrbs = 0;
2883                         txq = &txo->q;
2884                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2885                                 num_wrbs +=
2886                                         be_tx_compl_process(adapter, txo,
2887                                                             txcp->end_index);
2888                                 cmpl++;
2889                         }
2890                         if (cmpl) {
2891                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2892                                 atomic_sub(num_wrbs, &txq->used);
2893                                 timeo = 0;
2894                         }
2895                         if (!be_is_tx_compl_pending(txo))
2896                                 pending_txqs--;
2897                 }
2898
2899                 if (pending_txqs == 0 || ++timeo > 10 ||
2900                     be_check_error(adapter, BE_ERROR_HW))
2901                         break;
2902
2903                 mdelay(1);
2904         } while (true);
2905
2906         /* Free enqueued TX that was never notified to HW */
2907         for_all_tx_queues(adapter, txo, i) {
2908                 txq = &txo->q;
2909
2910                 if (atomic_read(&txq->used)) {
2911                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2912                                  i, atomic_read(&txq->used));
2913                         notified_idx = txq->tail;
2914                         end_idx = txq->tail;
2915                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2916                                   txq->len);
2917                         /* Use the tx-compl process logic to handle requests
2918                          * that were not sent to the HW.
2919                          */
2920                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2921                         atomic_sub(num_wrbs, &txq->used);
2922                         BUG_ON(atomic_read(&txq->used));
2923                         txo->pend_wrb_cnt = 0;
2924                         /* Since hw was never notified of these requests,
2925                          * reset TXQ indices
2926                          */
2927                         txq->head = notified_idx;
2928                         txq->tail = notified_idx;
2929                 }
2930         }
2931 }
2932
2933 static void be_evt_queues_destroy(struct be_adapter *adapter)
2934 {
2935         struct be_eq_obj *eqo;
2936         int i;
2937
2938         for_all_evt_queues(adapter, eqo, i) {
2939                 if (eqo->q.created) {
2940                         be_eq_clean(eqo);
2941                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2942                         netif_napi_del(&eqo->napi);
2943                         free_cpumask_var(eqo->affinity_mask);
2944                 }
2945                 be_queue_free(adapter, &eqo->q);
2946         }
2947 }
2948
2949 static int be_evt_queues_create(struct be_adapter *adapter)
2950 {
2951         struct be_queue_info *eq;
2952         struct be_eq_obj *eqo;
2953         struct be_aic_obj *aic;
2954         int i, rc;
2955
2956         /* need enough EQs to service both RX and TX queues */
2957         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2958                                     max(adapter->cfg_num_rx_irqs,
2959                                         adapter->cfg_num_tx_irqs));
2960
2961         adapter->aic_enabled = true;
2962
2963         for_all_evt_queues(adapter, eqo, i) {
2964                 int numa_node = dev_to_node(&adapter->pdev->dev);
2965
2966                 aic = &adapter->aic_obj[i];
2967                 eqo->adapter = adapter;
2968                 eqo->idx = i;
2969                 aic->max_eqd = BE_MAX_EQD;
2970
2971                 eq = &eqo->q;
2972                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2973                                     sizeof(struct be_eq_entry));
2974                 if (rc)
2975                         return rc;
2976
2977                 rc = be_cmd_eq_create(adapter, eqo);
2978                 if (rc)
2979                         return rc;
2980
2981                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2982                         return -ENOMEM;
2983                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2984                                 eqo->affinity_mask);
2985                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2986                                BE_NAPI_WEIGHT);
2987         }
2988         return 0;
2989 }
2990
2991 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2992 {
2993         struct be_queue_info *q;
2994
2995         q = &adapter->mcc_obj.q;
2996         if (q->created)
2997                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2998         be_queue_free(adapter, q);
2999
3000         q = &adapter->mcc_obj.cq;
3001         if (q->created)
3002                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3003         be_queue_free(adapter, q);
3004 }
3005
3006 /* Must be called only after TX qs are created as MCC shares TX EQ */
3007 static int be_mcc_queues_create(struct be_adapter *adapter)
3008 {
3009         struct be_queue_info *q, *cq;
3010
3011         cq = &adapter->mcc_obj.cq;
3012         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3013                            sizeof(struct be_mcc_compl)))
3014                 goto err;
3015
3016         /* Use the default EQ for MCC completions */
3017         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3018                 goto mcc_cq_free;
3019
3020         q = &adapter->mcc_obj.q;
3021         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3022                 goto mcc_cq_destroy;
3023
3024         if (be_cmd_mccq_create(adapter, q, cq))
3025                 goto mcc_q_free;
3026
3027         return 0;
3028
3029 mcc_q_free:
3030         be_queue_free(adapter, q);
3031 mcc_cq_destroy:
3032         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3033 mcc_cq_free:
3034         be_queue_free(adapter, cq);
3035 err:
3036         return -1;
3037 }
3038
3039 static void be_tx_queues_destroy(struct be_adapter *adapter)
3040 {
3041         struct be_queue_info *q;
3042         struct be_tx_obj *txo;
3043         u8 i;
3044
3045         for_all_tx_queues(adapter, txo, i) {
3046                 q = &txo->q;
3047                 if (q->created)
3048                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3049                 be_queue_free(adapter, q);
3050
3051                 q = &txo->cq;
3052                 if (q->created)
3053                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3054                 be_queue_free(adapter, q);
3055         }
3056 }
3057
3058 static int be_tx_qs_create(struct be_adapter *adapter)
3059 {
3060         struct be_queue_info *cq;
3061         struct be_tx_obj *txo;
3062         struct be_eq_obj *eqo;
3063         int status, i;
3064
3065         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3066
3067         for_all_tx_queues(adapter, txo, i) {
3068                 cq = &txo->cq;
3069                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3070                                         sizeof(struct be_eth_tx_compl));
3071                 if (status)
3072                         return status;
3073
3074                 u64_stats_init(&txo->stats.sync);
3075                 u64_stats_init(&txo->stats.sync_compl);
3076
3077                 /* If num_evt_qs is less than num_tx_qs, then more than
3078                  * one txq share an eq
3079                  */
3080                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3081                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3082                 if (status)
3083                         return status;
3084
3085                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3086                                         sizeof(struct be_eth_wrb));
3087                 if (status)
3088                         return status;
3089
3090                 status = be_cmd_txq_create(adapter, txo);
3091                 if (status)
3092                         return status;
3093
3094                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3095                                     eqo->idx);
3096         }
3097
3098         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3099                  adapter->num_tx_qs);
3100         return 0;
3101 }
3102
3103 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3104 {
3105         struct be_queue_info *q;
3106         struct be_rx_obj *rxo;
3107         int i;
3108
3109         for_all_rx_queues(adapter, rxo, i) {
3110                 q = &rxo->cq;
3111                 if (q->created)
3112                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3113                 be_queue_free(adapter, q);
3114         }
3115 }
3116
3117 static int be_rx_cqs_create(struct be_adapter *adapter)
3118 {
3119         struct be_queue_info *eq, *cq;
3120         struct be_rx_obj *rxo;
3121         int rc, i;
3122
3123         adapter->num_rss_qs =
3124                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3125
3126         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3127         if (adapter->num_rss_qs < 2)
3128                 adapter->num_rss_qs = 0;
3129
3130         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3131
3132         /* When the interface is not capable of RSS rings (and there is no
3133          * need to create a default RXQ) we'll still need one RXQ
3134          */
3135         if (adapter->num_rx_qs == 0)
3136                 adapter->num_rx_qs = 1;
3137
3138         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3139         for_all_rx_queues(adapter, rxo, i) {
3140                 rxo->adapter = adapter;
3141                 cq = &rxo->cq;
3142                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3143                                     sizeof(struct be_eth_rx_compl));
3144                 if (rc)
3145                         return rc;
3146
3147                 u64_stats_init(&rxo->stats.sync);
3148                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3149                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3150                 if (rc)
3151                         return rc;
3152         }
3153
3154         dev_info(&adapter->pdev->dev,
3155                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3156         return 0;
3157 }
3158
3159 static irqreturn_t be_intx(int irq, void *dev)
3160 {
3161         struct be_eq_obj *eqo = dev;
3162         struct be_adapter *adapter = eqo->adapter;
3163         int num_evts = 0;
3164
3165         /* IRQ is not expected when NAPI is scheduled as the EQ
3166          * will not be armed.
3167          * But, this can happen on Lancer INTx where it takes
3168          * a while to de-assert INTx or in BE2 where occasionaly
3169          * an interrupt may be raised even when EQ is unarmed.
3170          * If NAPI is already scheduled, then counting & notifying
3171          * events will orphan them.
3172          */
3173         if (napi_schedule_prep(&eqo->napi)) {
3174                 num_evts = events_get(eqo);
3175                 __napi_schedule(&eqo->napi);
3176                 if (num_evts)
3177                         eqo->spurious_intr = 0;
3178         }
3179         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3180
3181         /* Return IRQ_HANDLED only for the the first spurious intr
3182          * after a valid intr to stop the kernel from branding
3183          * this irq as a bad one!
3184          */
3185         if (num_evts || eqo->spurious_intr++ == 0)
3186                 return IRQ_HANDLED;
3187         else
3188                 return IRQ_NONE;
3189 }
3190
3191 static irqreturn_t be_msix(int irq, void *dev)
3192 {
3193         struct be_eq_obj *eqo = dev;
3194
3195         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3196         napi_schedule(&eqo->napi);
3197         return IRQ_HANDLED;
3198 }
3199
3200 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3201 {
3202         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3203 }
3204
3205 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3206                          int budget)
3207 {
3208         struct be_adapter *adapter = rxo->adapter;
3209         struct be_queue_info *rx_cq = &rxo->cq;
3210         struct be_rx_compl_info *rxcp;
3211         u32 work_done;
3212         u32 frags_consumed = 0;
3213
3214         for (work_done = 0; work_done < budget; work_done++) {
3215                 rxcp = be_rx_compl_get(rxo);
3216                 if (!rxcp)
3217                         break;
3218
3219                 /* Is it a flush compl that has no data */
3220                 if (unlikely(rxcp->num_rcvd == 0))
3221                         goto loop_continue;
3222
3223                 /* Discard compl with partial DMA Lancer B0 */
3224                 if (unlikely(!rxcp->pkt_size)) {
3225                         be_rx_compl_discard(rxo, rxcp);
3226                         goto loop_continue;
3227                 }
3228
3229                 /* On BE drop pkts that arrive due to imperfect filtering in
3230                  * promiscuous mode on some skews
3231                  */
3232                 if (unlikely(rxcp->port != adapter->port_num &&
3233                              !lancer_chip(adapter))) {
3234                         be_rx_compl_discard(rxo, rxcp);
3235                         goto loop_continue;
3236                 }
3237
3238                 if (do_gro(rxcp))
3239                         be_rx_compl_process_gro(rxo, napi, rxcp);
3240                 else
3241                         be_rx_compl_process(rxo, napi, rxcp);
3242
3243 loop_continue:
3244                 frags_consumed += rxcp->num_rcvd;
3245                 be_rx_stats_update(rxo, rxcp);
3246         }
3247
3248         if (work_done) {
3249                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3250
3251                 /* When an rx-obj gets into post_starved state, just
3252                  * let be_worker do the posting.
3253                  */
3254                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3255                     !rxo->rx_post_starved)
3256                         be_post_rx_frags(rxo, GFP_ATOMIC,
3257                                          max_t(u32, MAX_RX_POST,
3258                                                frags_consumed));
3259         }
3260
3261         return work_done;
3262 }
3263
3264
3265 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3266                           int idx)
3267 {
3268         int num_wrbs = 0, work_done = 0;
3269         struct be_tx_compl_info *txcp;
3270
3271         while ((txcp = be_tx_compl_get(adapter, txo))) {
3272                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3273                 work_done++;
3274         }
3275
3276         if (work_done) {
3277                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3278                 atomic_sub(num_wrbs, &txo->q.used);
3279
3280                 /* As Tx wrbs have been freed up, wake up netdev queue
3281                  * if it was stopped due to lack of tx wrbs.  */
3282                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3283                     be_can_txq_wake(txo)) {
3284                         netif_wake_subqueue(adapter->netdev, idx);
3285                 }
3286
3287                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3288                 tx_stats(txo)->tx_compl += work_done;
3289                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3290         }
3291 }
3292
3293 int be_poll(struct napi_struct *napi, int budget)
3294 {
3295         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296         struct be_adapter *adapter = eqo->adapter;
3297         int max_work = 0, work, i, num_evts;
3298         struct be_rx_obj *rxo;
3299         struct be_tx_obj *txo;
3300         u32 mult_enc = 0;
3301
3302         num_evts = events_get(eqo);
3303
3304         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305                 be_process_tx(adapter, txo, i);
3306
3307         /* This loop will iterate twice for EQ0 in which
3308          * completions of the last RXQ (default one) are also processed
3309          * For other EQs the loop iterates only once
3310          */
3311         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3312                 work = be_process_rx(rxo, napi, budget);
3313                 max_work = max(work, max_work);
3314         }
3315
3316         if (is_mcc_eqo(eqo))
3317                 be_process_mcc(adapter);
3318
3319         if (max_work < budget) {
3320                 napi_complete_done(napi, max_work);
3321
3322                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3323                  * delay via a delay multiplier encoding value
3324                  */
3325                 if (skyhawk_chip(adapter))
3326                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3327
3328                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3329                              mult_enc);
3330         } else {
3331                 /* As we'll continue in polling mode, count and clear events */
3332                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3333         }
3334         return max_work;
3335 }
3336
3337 void be_detect_error(struct be_adapter *adapter)
3338 {
3339         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3340         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3341         struct device *dev = &adapter->pdev->dev;
3342         u16 val;
3343         u32 i;
3344
3345         if (be_check_error(adapter, BE_ERROR_HW))
3346                 return;
3347
3348         if (lancer_chip(adapter)) {
3349                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3350                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3351                         be_set_error(adapter, BE_ERROR_UE);
3352                         sliport_err1 = ioread32(adapter->db +
3353                                                 SLIPORT_ERROR1_OFFSET);
3354                         sliport_err2 = ioread32(adapter->db +
3355                                                 SLIPORT_ERROR2_OFFSET);
3356                         /* Do not log error messages if its a FW reset */
3357                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3358                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3359                                 dev_info(dev, "Reset is in progress\n");
3360                         } else {
3361                                 dev_err(dev, "Error detected in the card\n");
3362                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3363                                         sliport_status);
3364                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3365                                         sliport_err1);
3366                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3367                                         sliport_err2);
3368                         }
3369                 }
3370         } else {
3371                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3372                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3373                 ue_lo_mask = ioread32(adapter->pcicfg +
3374                                       PCICFG_UE_STATUS_LOW_MASK);
3375                 ue_hi_mask = ioread32(adapter->pcicfg +
3376                                       PCICFG_UE_STATUS_HI_MASK);
3377
3378                 ue_lo = (ue_lo & ~ue_lo_mask);
3379                 ue_hi = (ue_hi & ~ue_hi_mask);
3380
3381                 if (ue_lo || ue_hi) {
3382                         /* On certain platforms BE3 hardware can indicate
3383                          * spurious UEs. In case of a UE in the chip,
3384                          * the POST register correctly reports either a
3385                          * FAT_LOG_START state (FW is currently dumping
3386                          * FAT log data) or a ARMFW_UE state. Check for the
3387                          * above states to ascertain if the UE is valid or not.
3388                          */
3389                         if (BE3_chip(adapter)) {
3390                                 val = be_POST_stage_get(adapter);
3391                                 if ((val & POST_STAGE_FAT_LOG_START)
3392                                      != POST_STAGE_FAT_LOG_START &&
3393                                     (val & POST_STAGE_ARMFW_UE)
3394                                      != POST_STAGE_ARMFW_UE &&
3395                                     (val & POST_STAGE_RECOVERABLE_ERR)
3396                                      != POST_STAGE_RECOVERABLE_ERR)
3397                                         return;
3398                         }
3399
3400                         dev_err(dev, "Error detected in the adapter");
3401                         be_set_error(adapter, BE_ERROR_UE);
3402
3403                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3404                                 if (ue_lo & 1)
3405                                         dev_err(dev, "UE: %s bit set\n",
3406                                                 ue_status_low_desc[i]);
3407                         }
3408                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3409                                 if (ue_hi & 1)
3410                                         dev_err(dev, "UE: %s bit set\n",
3411                                                 ue_status_hi_desc[i]);
3412                         }
3413                 }
3414         }
3415 }
3416
3417 static void be_msix_disable(struct be_adapter *adapter)
3418 {
3419         if (msix_enabled(adapter)) {
3420                 pci_disable_msix(adapter->pdev);
3421                 adapter->num_msix_vec = 0;
3422                 adapter->num_msix_roce_vec = 0;
3423         }
3424 }
3425
3426 static int be_msix_enable(struct be_adapter *adapter)
3427 {
3428         unsigned int i, max_roce_eqs;
3429         struct device *dev = &adapter->pdev->dev;
3430         int num_vec;
3431
3432         /* If RoCE is supported, program the max number of vectors that
3433          * could be used for NIC and RoCE, else, just program the number
3434          * we'll use initially.
3435          */
3436         if (be_roce_supported(adapter)) {
3437                 max_roce_eqs =
3438                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3439                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3440                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3441         } else {
3442                 num_vec = max(adapter->cfg_num_rx_irqs,
3443                               adapter->cfg_num_tx_irqs);
3444         }
3445
3446         for (i = 0; i < num_vec; i++)
3447                 adapter->msix_entries[i].entry = i;
3448
3449         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3450                                         MIN_MSIX_VECTORS, num_vec);
3451         if (num_vec < 0)
3452                 goto fail;
3453
3454         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3455                 adapter->num_msix_roce_vec = num_vec / 2;
3456                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3457                          adapter->num_msix_roce_vec);
3458         }
3459
3460         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3461
3462         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3463                  adapter->num_msix_vec);
3464         return 0;
3465
3466 fail:
3467         dev_warn(dev, "MSIx enable failed\n");
3468
3469         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3470         if (be_virtfn(adapter))
3471                 return num_vec;
3472         return 0;
3473 }
3474
3475 static inline int be_msix_vec_get(struct be_adapter *adapter,
3476                                   struct be_eq_obj *eqo)
3477 {
3478         return adapter->msix_entries[eqo->msix_idx].vector;
3479 }
3480
3481 static int be_msix_register(struct be_adapter *adapter)
3482 {
3483         struct net_device *netdev = adapter->netdev;
3484         struct be_eq_obj *eqo;
3485         int status, i, vec;
3486
3487         for_all_evt_queues(adapter, eqo, i) {
3488                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3489                 vec = be_msix_vec_get(adapter, eqo);
3490                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3491                 if (status)
3492                         goto err_msix;
3493
3494                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3495         }
3496
3497         return 0;
3498 err_msix:
3499         for (i--; i >= 0; i--) {
3500                 eqo = &adapter->eq_obj[i];
3501                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3502         }
3503         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3504                  status);
3505         be_msix_disable(adapter);
3506         return status;
3507 }
3508
3509 static int be_irq_register(struct be_adapter *adapter)
3510 {
3511         struct net_device *netdev = adapter->netdev;
3512         int status;
3513
3514         if (msix_enabled(adapter)) {
3515                 status = be_msix_register(adapter);
3516                 if (status == 0)
3517                         goto done;
3518                 /* INTx is not supported for VF */
3519                 if (be_virtfn(adapter))
3520                         return status;
3521         }
3522
3523         /* INTx: only the first EQ is used */
3524         netdev->irq = adapter->pdev->irq;
3525         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3526                              &adapter->eq_obj[0]);
3527         if (status) {
3528                 dev_err(&adapter->pdev->dev,
3529                         "INTx request IRQ failed - err %d\n", status);
3530                 return status;
3531         }
3532 done:
3533         adapter->isr_registered = true;
3534         return 0;
3535 }
3536
3537 static void be_irq_unregister(struct be_adapter *adapter)
3538 {
3539         struct net_device *netdev = adapter->netdev;
3540         struct be_eq_obj *eqo;
3541         int i, vec;
3542
3543         if (!adapter->isr_registered)
3544                 return;
3545
3546         /* INTx */
3547         if (!msix_enabled(adapter)) {
3548                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3549                 goto done;
3550         }
3551
3552         /* MSIx */
3553         for_all_evt_queues(adapter, eqo, i) {
3554                 vec = be_msix_vec_get(adapter, eqo);
3555                 irq_set_affinity_hint(vec, NULL);
3556                 free_irq(vec, eqo);
3557         }
3558
3559 done:
3560         adapter->isr_registered = false;
3561 }
3562
3563 static void be_rx_qs_destroy(struct be_adapter *adapter)
3564 {
3565         struct rss_info *rss = &adapter->rss_info;
3566         struct be_queue_info *q;
3567         struct be_rx_obj *rxo;
3568         int i;
3569
3570         for_all_rx_queues(adapter, rxo, i) {
3571                 q = &rxo->q;
3572                 if (q->created) {
3573                         /* If RXQs are destroyed while in an "out of buffer"
3574                          * state, there is a possibility of an HW stall on
3575                          * Lancer. So, post 64 buffers to each queue to relieve
3576                          * the "out of buffer" condition.
3577                          * Make sure there's space in the RXQ before posting.
3578                          */
3579                         if (lancer_chip(adapter)) {
3580                                 be_rx_cq_clean(rxo);
3581                                 if (atomic_read(&q->used) == 0)
3582                                         be_post_rx_frags(rxo, GFP_KERNEL,
3583                                                          MAX_RX_POST);
3584                         }
3585
3586                         be_cmd_rxq_destroy(adapter, q);
3587                         be_rx_cq_clean(rxo);
3588                         be_rxq_clean(rxo);
3589                 }
3590                 be_queue_free(adapter, q);
3591         }
3592
3593         if (rss->rss_flags) {
3594                 rss->rss_flags = RSS_ENABLE_NONE;
3595                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3596                                   128, rss->rss_hkey);
3597         }
3598 }
3599
3600 static void be_disable_if_filters(struct be_adapter *adapter)
3601 {
3602         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3603         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3604             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3605                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3606                 eth_zero_addr(adapter->dev_mac);
3607         }
3608
3609         be_clear_uc_list(adapter);
3610         be_clear_mc_list(adapter);
3611
3612         /* The IFACE flags are enabled in the open path and cleared
3613          * in the close path. When a VF gets detached from the host and
3614          * assigned to a VM the following happens:
3615          *      - VF's IFACE flags get cleared in the detach path
3616          *      - IFACE create is issued by the VF in the attach path
3617          * Due to a bug in the BE3/Skyhawk-R FW
3618          * (Lancer FW doesn't have the bug), the IFACE capability flags
3619          * specified along with the IFACE create cmd issued by a VF are not
3620          * honoured by FW.  As a consequence, if a *new* driver
3621          * (that enables/disables IFACE flags in open/close)
3622          * is loaded in the host and an *old* driver is * used by a VM/VF,
3623          * the IFACE gets created *without* the needed flags.
3624          * To avoid this, disable RX-filter flags only for Lancer.
3625          */
3626         if (lancer_chip(adapter)) {
3627                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3628                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3629         }
3630 }
3631
3632 static int be_close(struct net_device *netdev)
3633 {
3634         struct be_adapter *adapter = netdev_priv(netdev);
3635         struct be_eq_obj *eqo;
3636         int i;
3637
3638         /* This protection is needed as be_close() may be called even when the
3639          * adapter is in cleared state (after eeh perm failure)
3640          */
3641         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3642                 return 0;
3643
3644         /* Before attempting cleanup ensure all the pending cmds in the
3645          * config_wq have finished execution
3646          */
3647         flush_workqueue(be_wq);
3648
3649         be_disable_if_filters(adapter);
3650
3651         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3652                 for_all_evt_queues(adapter, eqo, i) {
3653                         napi_disable(&eqo->napi);
3654                 }
3655                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3656         }
3657
3658         be_async_mcc_disable(adapter);
3659
3660         /* Wait for all pending tx completions to arrive so that
3661          * all tx skbs are freed.
3662          */
3663         netif_tx_disable(netdev);
3664         be_tx_compl_clean(adapter);
3665
3666         be_rx_qs_destroy(adapter);
3667
3668         for_all_evt_queues(adapter, eqo, i) {
3669                 if (msix_enabled(adapter))
3670                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3671                 else
3672                         synchronize_irq(netdev->irq);
3673                 be_eq_clean(eqo);
3674         }
3675
3676         be_irq_unregister(adapter);
3677
3678         return 0;
3679 }
3680
3681 static int be_rx_qs_create(struct be_adapter *adapter)
3682 {
3683         struct rss_info *rss = &adapter->rss_info;
3684         u8 rss_key[RSS_HASH_KEY_LEN];
3685         struct be_rx_obj *rxo;
3686         int rc, i, j;
3687
3688         for_all_rx_queues(adapter, rxo, i) {
3689                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3690                                     sizeof(struct be_eth_rx_d));
3691                 if (rc)
3692                         return rc;
3693         }
3694
3695         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3696                 rxo = default_rxo(adapter);
3697                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3698                                        rx_frag_size, adapter->if_handle,
3699                                        false, &rxo->rss_id);
3700                 if (rc)
3701                         return rc;
3702         }
3703
3704         for_all_rss_queues(adapter, rxo, i) {
3705                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706                                        rx_frag_size, adapter->if_handle,
3707                                        true, &rxo->rss_id);
3708                 if (rc)
3709                         return rc;
3710         }
3711
3712         if (be_multi_rxq(adapter)) {
3713                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3714                         for_all_rss_queues(adapter, rxo, i) {
3715                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3716                                         break;
3717                                 rss->rsstable[j + i] = rxo->rss_id;
3718                                 rss->rss_queue[j + i] = i;
3719                         }
3720                 }
3721                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3722                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3723
3724                 if (!BEx_chip(adapter))
3725                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3726                                 RSS_ENABLE_UDP_IPV6;
3727
3728                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3729                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3730                                        RSS_INDIR_TABLE_LEN, rss_key);
3731                 if (rc) {
3732                         rss->rss_flags = RSS_ENABLE_NONE;
3733                         return rc;
3734                 }
3735
3736                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3737         } else {
3738                 /* Disable RSS, if only default RX Q is created */
3739                 rss->rss_flags = RSS_ENABLE_NONE;
3740         }
3741
3742
3743         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3744          * which is a queue empty condition
3745          */
3746         for_all_rx_queues(adapter, rxo, i)
3747                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3748
3749         return 0;
3750 }
3751
3752 static int be_enable_if_filters(struct be_adapter *adapter)
3753 {
3754         int status;
3755
3756         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3757         if (status)
3758                 return status;
3759
3760         /* Normally this condition usually true as the ->dev_mac is zeroed.
3761          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3762          * subsequent be_dev_mac_add() can fail (after fresh boot)
3763          */
3764         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3765                 int old_pmac_id = -1;
3766
3767                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3768                 if (!is_zero_ether_addr(adapter->dev_mac))
3769                         old_pmac_id = adapter->pmac_id[0];
3770
3771                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772                 if (status)
3773                         return status;
3774
3775                 /* Delete the old programmed MAC as we successfully programmed
3776                  * a new MAC
3777                  */
3778                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3779                         be_dev_mac_del(adapter, old_pmac_id);
3780
3781                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3782         }
3783
3784         if (adapter->vlans_added)
3785                 be_vid_config(adapter);
3786
3787         __be_set_rx_mode(adapter);
3788
3789         return 0;
3790 }
3791
3792 static int be_open(struct net_device *netdev)
3793 {
3794         struct be_adapter *adapter = netdev_priv(netdev);
3795         struct be_eq_obj *eqo;
3796         struct be_rx_obj *rxo;
3797         struct be_tx_obj *txo;
3798         u8 link_status;
3799         int status, i;
3800
3801         status = be_rx_qs_create(adapter);
3802         if (status)
3803                 goto err;
3804
3805         status = be_enable_if_filters(adapter);
3806         if (status)
3807                 goto err;
3808
3809         status = be_irq_register(adapter);
3810         if (status)
3811                 goto err;
3812
3813         for_all_rx_queues(adapter, rxo, i)
3814                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3815
3816         for_all_tx_queues(adapter, txo, i)
3817                 be_cq_notify(adapter, txo->cq.id, true, 0);
3818
3819         be_async_mcc_enable(adapter);
3820
3821         for_all_evt_queues(adapter, eqo, i) {
3822                 napi_enable(&eqo->napi);
3823                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3824         }
3825         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3826
3827         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3828         if (!status)
3829                 be_link_status_update(adapter, link_status);
3830
3831         netif_tx_start_all_queues(netdev);
3832         if (skyhawk_chip(adapter))
3833                 udp_tunnel_get_rx_info(netdev);
3834
3835         return 0;
3836 err:
3837         be_close(adapter->netdev);
3838         return -EIO;
3839 }
3840
3841 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3842 {
3843         u32 addr;
3844
3845         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3846
3847         mac[5] = (u8)(addr & 0xFF);
3848         mac[4] = (u8)((addr >> 8) & 0xFF);
3849         mac[3] = (u8)((addr >> 16) & 0xFF);
3850         /* Use the OUI from the current MAC address */
3851         memcpy(mac, adapter->netdev->dev_addr, 3);
3852 }
3853
3854 /*
3855  * Generate a seed MAC address from the PF MAC Address using jhash.
3856  * MAC Address for VFs are assigned incrementally starting from the seed.
3857  * These addresses are programmed in the ASIC by the PF and the VF driver
3858  * queries for the MAC address during its probe.
3859  */
3860 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3861 {
3862         u32 vf;
3863         int status = 0;
3864         u8 mac[ETH_ALEN];
3865         struct be_vf_cfg *vf_cfg;
3866
3867         be_vf_eth_addr_generate(adapter, mac);
3868
3869         for_all_vfs(adapter, vf_cfg, vf) {
3870                 if (BEx_chip(adapter))
3871                         status = be_cmd_pmac_add(adapter, mac,
3872                                                  vf_cfg->if_handle,
3873                                                  &vf_cfg->pmac_id, vf + 1);
3874                 else
3875                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3876                                                 vf + 1);
3877
3878                 if (status)
3879                         dev_err(&adapter->pdev->dev,
3880                                 "Mac address assignment failed for VF %d\n",
3881                                 vf);
3882                 else
3883                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3884
3885                 mac[5] += 1;
3886         }
3887         return status;
3888 }
3889
3890 static int be_vfs_mac_query(struct be_adapter *adapter)
3891 {
3892         int status, vf;
3893         u8 mac[ETH_ALEN];
3894         struct be_vf_cfg *vf_cfg;
3895
3896         for_all_vfs(adapter, vf_cfg, vf) {
3897                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3898                                                mac, vf_cfg->if_handle,
3899                                                false, vf+1);
3900                 if (status)
3901                         return status;
3902                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3903         }
3904         return 0;
3905 }
3906
3907 static void be_vf_clear(struct be_adapter *adapter)
3908 {
3909         struct be_vf_cfg *vf_cfg;
3910         u32 vf;
3911
3912         if (pci_vfs_assigned(adapter->pdev)) {
3913                 dev_warn(&adapter->pdev->dev,
3914                          "VFs are assigned to VMs: not disabling VFs\n");
3915                 goto done;
3916         }
3917
3918         pci_disable_sriov(adapter->pdev);
3919
3920         for_all_vfs(adapter, vf_cfg, vf) {
3921                 if (BEx_chip(adapter))
3922                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3923                                         vf_cfg->pmac_id, vf + 1);
3924                 else
3925                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3926                                        vf + 1);
3927
3928                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3929         }
3930
3931         if (BE3_chip(adapter))
3932                 be_cmd_set_hsw_config(adapter, 0, 0,
3933                                       adapter->if_handle,
3934                                       PORT_FWD_TYPE_PASSTHRU, 0);
3935 done:
3936         kfree(adapter->vf_cfg);
3937         adapter->num_vfs = 0;
3938         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3939 }
3940
3941 static void be_clear_queues(struct be_adapter *adapter)
3942 {
3943         be_mcc_queues_destroy(adapter);
3944         be_rx_cqs_destroy(adapter);
3945         be_tx_queues_destroy(adapter);
3946         be_evt_queues_destroy(adapter);
3947 }
3948
3949 static void be_cancel_worker(struct be_adapter *adapter)
3950 {
3951         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3952                 cancel_delayed_work_sync(&adapter->work);
3953                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3954         }
3955 }
3956
3957 static void be_cancel_err_detection(struct be_adapter *adapter)
3958 {
3959         struct be_error_recovery *err_rec = &adapter->error_recovery;
3960
3961         if (!be_err_recovery_workq)
3962                 return;
3963
3964         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3965                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3966                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3967         }
3968 }
3969
3970 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3971 {
3972         struct net_device *netdev = adapter->netdev;
3973         struct device *dev = &adapter->pdev->dev;
3974         struct be_vxlan_port *vxlan_port;
3975         __be16 port;
3976         int status;
3977
3978         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3979                                       struct be_vxlan_port, list);
3980         port = vxlan_port->port;
3981
3982         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3983                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3984         if (status) {
3985                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3986                 return status;
3987         }
3988         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3989
3990         status = be_cmd_set_vxlan_port(adapter, port);
3991         if (status) {
3992                 dev_warn(dev, "Failed to add VxLAN port\n");
3993                 return status;
3994         }
3995         adapter->vxlan_port = port;
3996
3997         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3998                                    NETIF_F_TSO | NETIF_F_TSO6 |
3999                                    NETIF_F_GSO_UDP_TUNNEL;
4000
4001         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4002                  be16_to_cpu(port));
4003         return 0;
4004 }
4005
4006 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4007 {
4008         struct net_device *netdev = adapter->netdev;
4009
4010         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4011                 be_cmd_manage_iface(adapter, adapter->if_handle,
4012                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4013
4014         if (adapter->vxlan_port)
4015                 be_cmd_set_vxlan_port(adapter, 0);
4016
4017         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4018         adapter->vxlan_port = 0;
4019
4020         netdev->hw_enc_features = 0;
4021 }
4022
4023 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4024                                 struct be_resources *vft_res)
4025 {
4026         struct be_resources res = adapter->pool_res;
4027         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4028         struct be_resources res_mod = {0};
4029         u16 num_vf_qs = 1;
4030
4031         /* Distribute the queue resources among the PF and it's VFs */
4032         if (num_vfs) {
4033                 /* Divide the rx queues evenly among the VFs and the PF, capped
4034                  * at VF-EQ-count. Any remainder queues belong to the PF.
4035                  */
4036                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4037                                 res.max_rss_qs / (num_vfs + 1));
4038
4039                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4040                  * RSS Tables per port. Provide RSS on VFs, only if number of
4041                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4042                  */
4043                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4044                         num_vf_qs = 1;
4045         }
4046
4047         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4048          * which are modifiable using SET_PROFILE_CONFIG cmd.
4049          */
4050         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4051                                   RESOURCE_MODIFIABLE, 0);
4052
4053         /* If RSS IFACE capability flags are modifiable for a VF, set the
4054          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4055          * more than 1 RSSQ is available for a VF.
4056          * Otherwise, provision only 1 queue pair for VF.
4057          */
4058         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4059                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4060                 if (num_vf_qs > 1) {
4061                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4062                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4063                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4064                 } else {
4065                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4066                                              BE_IF_FLAGS_DEFQ_RSS);
4067                 }
4068         } else {
4069                 num_vf_qs = 1;
4070         }
4071
4072         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4073                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4074                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4075         }
4076
4077         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4078         vft_res->max_rx_qs = num_vf_qs;
4079         vft_res->max_rss_qs = num_vf_qs;
4080         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4081         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4082
4083         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4084          * among the PF and it's VFs, if the fields are changeable
4085          */
4086         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4087                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4088
4089         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4090                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4091
4092         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4093                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4094
4095         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4096                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4097 }
4098
4099 static void be_if_destroy(struct be_adapter *adapter)
4100 {
4101         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4102
4103         kfree(adapter->pmac_id);
4104         adapter->pmac_id = NULL;
4105
4106         kfree(adapter->mc_list);
4107         adapter->mc_list = NULL;
4108
4109         kfree(adapter->uc_list);
4110         adapter->uc_list = NULL;
4111 }
4112
4113 static int be_clear(struct be_adapter *adapter)
4114 {
4115         struct pci_dev *pdev = adapter->pdev;
4116         struct  be_resources vft_res = {0};
4117
4118         be_cancel_worker(adapter);
4119
4120         flush_workqueue(be_wq);
4121
4122         if (sriov_enabled(adapter))
4123                 be_vf_clear(adapter);
4124
4125         /* Re-configure FW to distribute resources evenly across max-supported
4126          * number of VFs, only when VFs are not already enabled.
4127          */
4128         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4129             !pci_vfs_assigned(pdev)) {
4130                 be_calculate_vf_res(adapter,
4131                                     pci_sriov_get_totalvfs(pdev),
4132                                     &vft_res);
4133                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4134                                         pci_sriov_get_totalvfs(pdev),
4135                                         &vft_res);
4136         }
4137
4138         be_disable_vxlan_offloads(adapter);
4139
4140         be_if_destroy(adapter);
4141
4142         be_clear_queues(adapter);
4143
4144         be_msix_disable(adapter);
4145         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4146         return 0;
4147 }
4148
4149 static int be_vfs_if_create(struct be_adapter *adapter)
4150 {
4151         struct be_resources res = {0};
4152         u32 cap_flags, en_flags, vf;
4153         struct be_vf_cfg *vf_cfg;
4154         int status;
4155
4156         /* If a FW profile exists, then cap_flags are updated */
4157         cap_flags = BE_VF_IF_EN_FLAGS;
4158
4159         for_all_vfs(adapter, vf_cfg, vf) {
4160                 if (!BE3_chip(adapter)) {
4161                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4162                                                            ACTIVE_PROFILE_TYPE,
4163                                                            RESOURCE_LIMITS,
4164                                                            vf + 1);
4165                         if (!status) {
4166                                 cap_flags = res.if_cap_flags;
4167                                 /* Prevent VFs from enabling VLAN promiscuous
4168                                  * mode
4169                                  */
4170                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4171                         }
4172                 }
4173
4174                 /* PF should enable IF flags during proxy if_create call */
4175                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4176                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4177                                           &vf_cfg->if_handle, vf + 1);
4178                 if (status)
4179                         return status;
4180         }
4181
4182         return 0;
4183 }
4184
4185 static int be_vf_setup_init(struct be_adapter *adapter)
4186 {
4187         struct be_vf_cfg *vf_cfg;
4188         int vf;
4189
4190         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4191                                   GFP_KERNEL);
4192         if (!adapter->vf_cfg)
4193                 return -ENOMEM;
4194
4195         for_all_vfs(adapter, vf_cfg, vf) {
4196                 vf_cfg->if_handle = -1;
4197                 vf_cfg->pmac_id = -1;
4198         }
4199         return 0;
4200 }
4201
4202 static int be_vf_setup(struct be_adapter *adapter)
4203 {
4204         struct device *dev = &adapter->pdev->dev;
4205         struct be_vf_cfg *vf_cfg;
4206         int status, old_vfs, vf;
4207         bool spoofchk;
4208
4209         old_vfs = pci_num_vf(adapter->pdev);
4210
4211         status = be_vf_setup_init(adapter);
4212         if (status)
4213                 goto err;
4214
4215         if (old_vfs) {
4216                 for_all_vfs(adapter, vf_cfg, vf) {
4217                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4218                         if (status)
4219                                 goto err;
4220                 }
4221
4222                 status = be_vfs_mac_query(adapter);
4223                 if (status)
4224                         goto err;
4225         } else {
4226                 status = be_vfs_if_create(adapter);
4227                 if (status)
4228                         goto err;
4229
4230                 status = be_vf_eth_addr_config(adapter);
4231                 if (status)
4232                         goto err;
4233         }
4234
4235         for_all_vfs(adapter, vf_cfg, vf) {
4236                 /* Allow VFs to programs MAC/VLAN filters */
4237                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4238                                                   vf + 1);
4239                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4240                         status = be_cmd_set_fn_privileges(adapter,
4241                                                           vf_cfg->privileges |
4242                                                           BE_PRIV_FILTMGMT,
4243                                                           vf + 1);
4244                         if (!status) {
4245                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4246                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4247                                          vf);
4248                         }
4249                 }
4250
4251                 /* Allow full available bandwidth */
4252                 if (!old_vfs)
4253                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4254
4255                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4256                                                vf_cfg->if_handle, NULL,
4257                                                &spoofchk);
4258                 if (!status)
4259                         vf_cfg->spoofchk = spoofchk;
4260
4261                 if (!old_vfs) {
4262                         be_cmd_enable_vf(adapter, vf + 1);
4263                         be_cmd_set_logical_link_config(adapter,
4264                                                        IFLA_VF_LINK_STATE_AUTO,
4265                                                        vf+1);
4266                 }
4267         }
4268
4269         if (!old_vfs) {
4270                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4271                 if (status) {
4272                         dev_err(dev, "SRIOV enable failed\n");
4273                         adapter->num_vfs = 0;
4274                         goto err;
4275                 }
4276         }
4277
4278         if (BE3_chip(adapter)) {
4279                 /* On BE3, enable VEB only when SRIOV is enabled */
4280                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4281                                                adapter->if_handle,
4282                                                PORT_FWD_TYPE_VEB, 0);
4283                 if (status)
4284                         goto err;
4285         }
4286
4287         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4288         return 0;
4289 err:
4290         dev_err(dev, "VF setup failed\n");
4291         be_vf_clear(adapter);
4292         return status;
4293 }
4294
4295 /* Converting function_mode bits on BE3 to SH mc_type enums */
4296
4297 static u8 be_convert_mc_type(u32 function_mode)
4298 {
4299         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4300                 return vNIC1;
4301         else if (function_mode & QNQ_MODE)
4302                 return FLEX10;
4303         else if (function_mode & VNIC_MODE)
4304                 return vNIC2;
4305         else if (function_mode & UMC_ENABLED)
4306                 return UMC;
4307         else
4308                 return MC_NONE;
4309 }
4310
4311 /* On BE2/BE3 FW does not suggest the supported limits */
4312 static void BEx_get_resources(struct be_adapter *adapter,
4313                               struct be_resources *res)
4314 {
4315         bool use_sriov = adapter->num_vfs ? 1 : 0;
4316
4317         if (be_physfn(adapter))
4318                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4319         else
4320                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4321
4322         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4323
4324         if (be_is_mc(adapter)) {
4325                 /* Assuming that there are 4 channels per port,
4326                  * when multi-channel is enabled
4327                  */
4328                 if (be_is_qnq_mode(adapter))
4329                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4330                 else
4331                         /* In a non-qnq multichannel mode, the pvid
4332                          * takes up one vlan entry
4333                          */
4334                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4335         } else {
4336                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4337         }
4338
4339         res->max_mcast_mac = BE_MAX_MC;
4340
4341         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4342          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4343          *    *only* if it is RSS-capable.
4344          */
4345         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4346             be_virtfn(adapter) ||
4347             (be_is_mc(adapter) &&
4348              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4349                 res->max_tx_qs = 1;
4350         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4351                 struct be_resources super_nic_res = {0};
4352
4353                 /* On a SuperNIC profile, the driver needs to use the
4354                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4355                  */
4356                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4357                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4358                                           0);
4359                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4360                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4361         } else {
4362                 res->max_tx_qs = BE3_MAX_TX_QS;
4363         }
4364
4365         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4366             !use_sriov && be_physfn(adapter))
4367                 res->max_rss_qs = (adapter->be3_native) ?
4368                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4369         res->max_rx_qs = res->max_rss_qs + 1;
4370
4371         if (be_physfn(adapter))
4372                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4373                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4374         else
4375                 res->max_evt_qs = 1;
4376
4377         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4378         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4379         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4380                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4381 }
4382
4383 static void be_setup_init(struct be_adapter *adapter)
4384 {
4385         adapter->vlan_prio_bmap = 0xff;
4386         adapter->phy.link_speed = -1;
4387         adapter->if_handle = -1;
4388         adapter->be3_native = false;
4389         adapter->if_flags = 0;
4390         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4391         if (be_physfn(adapter))
4392                 adapter->cmd_privileges = MAX_PRIVILEGES;
4393         else
4394                 adapter->cmd_privileges = MIN_PRIVILEGES;
4395 }
4396
4397 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4398  * However, this HW limitation is not exposed to the host via any SLI cmd.
4399  * As a result, in the case of SRIOV and in particular multi-partition configs
4400  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4401  * for distribution between the VFs. This self-imposed limit will determine the
4402  * no: of VFs for which RSS can be enabled.
4403  */
4404 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4405 {
4406         struct be_port_resources port_res = {0};
4407         u8 rss_tables_on_port;
4408         u16 max_vfs = be_max_vfs(adapter);
4409
4410         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4411                                   RESOURCE_LIMITS, 0);
4412
4413         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4414
4415         /* Each PF Pool's RSS Tables limit =
4416          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4417          */
4418         adapter->pool_res.max_rss_tables =
4419                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4420 }
4421
4422 static int be_get_sriov_config(struct be_adapter *adapter)
4423 {
4424         struct be_resources res = {0};
4425         int max_vfs, old_vfs;
4426
4427         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4428                                   RESOURCE_LIMITS, 0);
4429
4430         /* Some old versions of BE3 FW don't report max_vfs value */
4431         if (BE3_chip(adapter) && !res.max_vfs) {
4432                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4433                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4434         }
4435
4436         adapter->pool_res = res;
4437
4438         /* If during previous unload of the driver, the VFs were not disabled,
4439          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4440          * Instead use the TotalVFs value stored in the pci-dev struct.
4441          */
4442         old_vfs = pci_num_vf(adapter->pdev);
4443         if (old_vfs) {
4444                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4445                          old_vfs);
4446
4447                 adapter->pool_res.max_vfs =
4448                         pci_sriov_get_totalvfs(adapter->pdev);
4449                 adapter->num_vfs = old_vfs;
4450         }
4451
4452         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4453                 be_calculate_pf_pool_rss_tables(adapter);
4454                 dev_info(&adapter->pdev->dev,
4455                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4456                          be_max_pf_pool_rss_tables(adapter));
4457         }
4458         return 0;
4459 }
4460
4461 static void be_alloc_sriov_res(struct be_adapter *adapter)
4462 {
4463         int old_vfs = pci_num_vf(adapter->pdev);
4464         struct  be_resources vft_res = {0};
4465         int status;
4466
4467         be_get_sriov_config(adapter);
4468
4469         if (!old_vfs)
4470                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4471
4472         /* When the HW is in SRIOV capable configuration, the PF-pool
4473          * resources are given to PF during driver load, if there are no
4474          * old VFs. This facility is not available in BE3 FW.
4475          * Also, this is done by FW in Lancer chip.
4476          */
4477         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4478                 be_calculate_vf_res(adapter, 0, &vft_res);
4479                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4480                                                  &vft_res);
4481                 if (status)
4482                         dev_err(&adapter->pdev->dev,
4483                                 "Failed to optimize SRIOV resources\n");
4484         }
4485 }
4486
4487 static int be_get_resources(struct be_adapter *adapter)
4488 {
4489         struct device *dev = &adapter->pdev->dev;
4490         struct be_resources res = {0};
4491         int status;
4492
4493         /* For Lancer, SH etc read per-function resource limits from FW.
4494          * GET_FUNC_CONFIG returns per function guaranteed limits.
4495          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4496          */
4497         if (BEx_chip(adapter)) {
4498                 BEx_get_resources(adapter, &res);
4499         } else {
4500                 status = be_cmd_get_func_config(adapter, &res);
4501                 if (status)
4502                         return status;
4503
4504                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4505                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4506                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4507                         res.max_rss_qs -= 1;
4508         }
4509
4510         /* If RoCE is supported stash away half the EQs for RoCE */
4511         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4512                                 res.max_evt_qs / 2 : res.max_evt_qs;
4513         adapter->res = res;
4514
4515         /* If FW supports RSS default queue, then skip creating non-RSS
4516          * queue for non-IP traffic.
4517          */
4518         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4519                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4520
4521         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4522                  be_max_txqs(adapter), be_max_rxqs(adapter),
4523                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4524                  be_max_vfs(adapter));
4525         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4526                  be_max_uc(adapter), be_max_mc(adapter),
4527                  be_max_vlans(adapter));
4528
4529         /* Ensure RX and TX queues are created in pairs at init time */
4530         adapter->cfg_num_rx_irqs =
4531                                 min_t(u16, netif_get_num_default_rss_queues(),
4532                                       be_max_qp_irqs(adapter));
4533         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4534         return 0;
4535 }
4536
4537 static int be_get_config(struct be_adapter *adapter)
4538 {
4539         int status, level;
4540         u16 profile_id;
4541
4542         status = be_cmd_get_cntl_attributes(adapter);
4543         if (status)
4544                 return status;
4545
4546         status = be_cmd_query_fw_cfg(adapter);
4547         if (status)
4548                 return status;
4549
4550         if (!lancer_chip(adapter) && be_physfn(adapter))
4551                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4552
4553         if (BEx_chip(adapter)) {
4554                 level = be_cmd_get_fw_log_level(adapter);
4555                 adapter->msg_enable =
4556                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4557         }
4558
4559         be_cmd_get_acpi_wol_cap(adapter);
4560         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4561         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4562
4563         be_cmd_query_port_name(adapter);
4564
4565         if (be_physfn(adapter)) {
4566                 status = be_cmd_get_active_profile(adapter, &profile_id);
4567                 if (!status)
4568                         dev_info(&adapter->pdev->dev,
4569                                  "Using profile 0x%x\n", profile_id);
4570         }
4571
4572         return 0;
4573 }
4574
4575 static int be_mac_setup(struct be_adapter *adapter)
4576 {
4577         u8 mac[ETH_ALEN];
4578         int status;
4579
4580         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4581                 status = be_cmd_get_perm_mac(adapter, mac);
4582                 if (status)
4583                         return status;
4584
4585                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4586                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4587
4588                 /* Initial MAC for BE3 VFs is already programmed by PF */
4589                 if (BEx_chip(adapter) && be_virtfn(adapter))
4590                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4591         }
4592
4593         return 0;
4594 }
4595
4596 static void be_schedule_worker(struct be_adapter *adapter)
4597 {
4598         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4599         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4600 }
4601
4602 static void be_destroy_err_recovery_workq(void)
4603 {
4604         if (!be_err_recovery_workq)
4605                 return;
4606
4607         flush_workqueue(be_err_recovery_workq);
4608         destroy_workqueue(be_err_recovery_workq);
4609         be_err_recovery_workq = NULL;
4610 }
4611
4612 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4613 {
4614         struct be_error_recovery *err_rec = &adapter->error_recovery;
4615
4616         if (!be_err_recovery_workq)
4617                 return;
4618
4619         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4620                            msecs_to_jiffies(delay));
4621         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4622 }
4623
4624 static int be_setup_queues(struct be_adapter *adapter)
4625 {
4626         struct net_device *netdev = adapter->netdev;
4627         int status;
4628
4629         status = be_evt_queues_create(adapter);
4630         if (status)
4631                 goto err;
4632
4633         status = be_tx_qs_create(adapter);
4634         if (status)
4635                 goto err;
4636
4637         status = be_rx_cqs_create(adapter);
4638         if (status)
4639                 goto err;
4640
4641         status = be_mcc_queues_create(adapter);
4642         if (status)
4643                 goto err;
4644
4645         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4646         if (status)
4647                 goto err;
4648
4649         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4650         if (status)
4651                 goto err;
4652
4653         return 0;
4654 err:
4655         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4656         return status;
4657 }
4658
4659 static int be_if_create(struct be_adapter *adapter)
4660 {
4661         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4662         u32 cap_flags = be_if_cap_flags(adapter);
4663         int status;
4664
4665         /* alloc required memory for other filtering fields */
4666         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4667                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4668         if (!adapter->pmac_id)
4669                 return -ENOMEM;
4670
4671         adapter->mc_list = kcalloc(be_max_mc(adapter),
4672                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4673         if (!adapter->mc_list)
4674                 return -ENOMEM;
4675
4676         adapter->uc_list = kcalloc(be_max_uc(adapter),
4677                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4678         if (!adapter->uc_list)
4679                 return -ENOMEM;
4680
4681         if (adapter->cfg_num_rx_irqs == 1)
4682                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4683
4684         en_flags &= cap_flags;
4685         /* will enable all the needed filter flags in be_open() */
4686         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4687                                   &adapter->if_handle, 0);
4688
4689         if (status)
4690                 return status;
4691
4692         return 0;
4693 }
4694
4695 int be_update_queues(struct be_adapter *adapter)
4696 {
4697         struct net_device *netdev = adapter->netdev;
4698         int status;
4699
4700         if (netif_running(netdev)) {
4701                 /* be_tx_timeout() must not run concurrently with this
4702                  * function, synchronize with an already-running dev_watchdog
4703                  */
4704                 netif_tx_lock_bh(netdev);
4705                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4706                 netif_carrier_off(netdev);
4707                 netif_tx_unlock_bh(netdev);
4708
4709                 be_close(netdev);
4710         }
4711
4712         be_cancel_worker(adapter);
4713
4714         /* If any vectors have been shared with RoCE we cannot re-program
4715          * the MSIx table.
4716          */
4717         if (!adapter->num_msix_roce_vec)
4718                 be_msix_disable(adapter);
4719
4720         be_clear_queues(adapter);
4721         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4722         if (status)
4723                 return status;
4724
4725         if (!msix_enabled(adapter)) {
4726                 status = be_msix_enable(adapter);
4727                 if (status)
4728                         return status;
4729         }
4730
4731         status = be_if_create(adapter);
4732         if (status)
4733                 return status;
4734
4735         status = be_setup_queues(adapter);
4736         if (status)
4737                 return status;
4738
4739         be_schedule_worker(adapter);
4740
4741         /* The IF was destroyed and re-created. We need to clear
4742          * all promiscuous flags valid for the destroyed IF.
4743          * Without this promisc mode is not restored during
4744          * be_open() because the driver thinks that it is
4745          * already enabled in HW.
4746          */
4747         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4748
4749         if (netif_running(netdev))
4750                 status = be_open(netdev);
4751
4752         return status;
4753 }
4754
4755 static inline int fw_major_num(const char *fw_ver)
4756 {
4757         int fw_major = 0, i;
4758
4759         i = sscanf(fw_ver, "%d.", &fw_major);
4760         if (i != 1)
4761                 return 0;
4762
4763         return fw_major;
4764 }
4765
4766 /* If it is error recovery, FLR the PF
4767  * Else if any VFs are already enabled don't FLR the PF
4768  */
4769 static bool be_reset_required(struct be_adapter *adapter)
4770 {
4771         if (be_error_recovering(adapter))
4772                 return true;
4773         else
4774                 return pci_num_vf(adapter->pdev) == 0;
4775 }
4776
4777 /* Wait for the FW to be ready and perform the required initialization */
4778 static int be_func_init(struct be_adapter *adapter)
4779 {
4780         int status;
4781
4782         status = be_fw_wait_ready(adapter);
4783         if (status)
4784                 return status;
4785
4786         /* FW is now ready; clear errors to allow cmds/doorbell */
4787         be_clear_error(adapter, BE_CLEAR_ALL);
4788
4789         if (be_reset_required(adapter)) {
4790                 status = be_cmd_reset_function(adapter);
4791                 if (status)
4792                         return status;
4793
4794                 /* Wait for interrupts to quiesce after an FLR */
4795                 msleep(100);
4796         }
4797
4798         /* Tell FW we're ready to fire cmds */
4799         status = be_cmd_fw_init(adapter);
4800         if (status)
4801                 return status;
4802
4803         /* Allow interrupts for other ULPs running on NIC function */
4804         be_intr_set(adapter, true);
4805
4806         return 0;
4807 }
4808
4809 static int be_setup(struct be_adapter *adapter)
4810 {
4811         struct device *dev = &adapter->pdev->dev;
4812         int status;
4813
4814         status = be_func_init(adapter);
4815         if (status)
4816                 return status;
4817
4818         be_setup_init(adapter);
4819
4820         if (!lancer_chip(adapter))
4821                 be_cmd_req_native_mode(adapter);
4822
4823         /* invoke this cmd first to get pf_num and vf_num which are needed
4824          * for issuing profile related cmds
4825          */
4826         if (!BEx_chip(adapter)) {
4827                 status = be_cmd_get_func_config(adapter, NULL);
4828                 if (status)
4829                         return status;
4830         }
4831
4832         status = be_get_config(adapter);
4833         if (status)
4834                 goto err;
4835
4836         if (!BE2_chip(adapter) && be_physfn(adapter))
4837                 be_alloc_sriov_res(adapter);
4838
4839         status = be_get_resources(adapter);
4840         if (status)
4841                 goto err;
4842
4843         status = be_msix_enable(adapter);
4844         if (status)
4845                 goto err;
4846
4847         /* will enable all the needed filter flags in be_open() */
4848         status = be_if_create(adapter);
4849         if (status)
4850                 goto err;
4851
4852         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4853         rtnl_lock();
4854         status = be_setup_queues(adapter);
4855         rtnl_unlock();
4856         if (status)
4857                 goto err;
4858
4859         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4860
4861         status = be_mac_setup(adapter);
4862         if (status)
4863                 goto err;
4864
4865         be_cmd_get_fw_ver(adapter);
4866         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4867
4868         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4869                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4870                         adapter->fw_ver);
4871                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4872         }
4873
4874         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4875                                          adapter->rx_fc);
4876         if (status)
4877                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4878                                         &adapter->rx_fc);
4879
4880         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4881                  adapter->tx_fc, adapter->rx_fc);
4882
4883         if (be_physfn(adapter))
4884                 be_cmd_set_logical_link_config(adapter,
4885                                                IFLA_VF_LINK_STATE_AUTO, 0);
4886
4887         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4888          * confusing a linux bridge or OVS that it might be connected to.
4889          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4890          * when SRIOV is not enabled.
4891          */
4892         if (BE3_chip(adapter))
4893                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4894                                       PORT_FWD_TYPE_PASSTHRU, 0);
4895
4896         if (adapter->num_vfs)
4897                 be_vf_setup(adapter);
4898
4899         status = be_cmd_get_phy_info(adapter);
4900         if (!status && be_pause_supported(adapter))
4901                 adapter->phy.fc_autoneg = 1;
4902
4903         if (be_physfn(adapter) && !lancer_chip(adapter))
4904                 be_cmd_set_features(adapter);
4905
4906         be_schedule_worker(adapter);
4907         adapter->flags |= BE_FLAGS_SETUP_DONE;
4908         return 0;
4909 err:
4910         be_clear(adapter);
4911         return status;
4912 }
4913
4914 #ifdef CONFIG_NET_POLL_CONTROLLER
4915 static void be_netpoll(struct net_device *netdev)
4916 {
4917         struct be_adapter *adapter = netdev_priv(netdev);
4918         struct be_eq_obj *eqo;
4919         int i;
4920
4921         for_all_evt_queues(adapter, eqo, i) {
4922                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4923                 napi_schedule(&eqo->napi);
4924         }
4925 }
4926 #endif
4927
4928 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4929 {
4930         const struct firmware *fw;
4931         int status;
4932
4933         if (!netif_running(adapter->netdev)) {
4934                 dev_err(&adapter->pdev->dev,
4935                         "Firmware load not allowed (interface is down)\n");
4936                 return -ENETDOWN;
4937         }
4938
4939         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4940         if (status)
4941                 goto fw_exit;
4942
4943         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4944
4945         if (lancer_chip(adapter))
4946                 status = lancer_fw_download(adapter, fw);
4947         else
4948                 status = be_fw_download(adapter, fw);
4949
4950         if (!status)
4951                 be_cmd_get_fw_ver(adapter);
4952
4953 fw_exit:
4954         release_firmware(fw);
4955         return status;
4956 }
4957
4958 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4959                                  u16 flags, struct netlink_ext_ack *extack)
4960 {
4961         struct be_adapter *adapter = netdev_priv(dev);
4962         struct nlattr *attr, *br_spec;
4963         int rem;
4964         int status = 0;
4965         u16 mode = 0;
4966
4967         if (!sriov_enabled(adapter))
4968                 return -EOPNOTSUPP;
4969
4970         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4971         if (!br_spec)
4972                 return -EINVAL;
4973
4974         nla_for_each_nested(attr, br_spec, rem) {
4975                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4976                         continue;
4977
4978                 if (nla_len(attr) < sizeof(mode))
4979                         return -EINVAL;
4980
4981                 mode = nla_get_u16(attr);
4982                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4983                         return -EOPNOTSUPP;
4984
4985                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4986                         return -EINVAL;
4987
4988                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4989                                                adapter->if_handle,
4990                                                mode == BRIDGE_MODE_VEPA ?
4991                                                PORT_FWD_TYPE_VEPA :
4992                                                PORT_FWD_TYPE_VEB, 0);
4993                 if (status)
4994                         goto err;
4995
4996                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4997                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4998
4999                 return status;
5000         }
5001 err:
5002         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5003                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5004
5005         return status;
5006 }
5007
5008 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5009                                  struct net_device *dev, u32 filter_mask,
5010                                  int nlflags)
5011 {
5012         struct be_adapter *adapter = netdev_priv(dev);
5013         int status = 0;
5014         u8 hsw_mode;
5015
5016         /* BE and Lancer chips support VEB mode only */
5017         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5018                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5019                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5020                         return 0;
5021                 hsw_mode = PORT_FWD_TYPE_VEB;
5022         } else {
5023                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5024                                                adapter->if_handle, &hsw_mode,
5025                                                NULL);
5026                 if (status)
5027                         return 0;
5028
5029                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5030                         return 0;
5031         }
5032
5033         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5034                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5035                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5036                                        0, 0, nlflags, filter_mask, NULL);
5037 }
5038
5039 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5040                                          void (*func)(struct work_struct *))
5041 {
5042         struct be_cmd_work *work;
5043
5044         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5045         if (!work) {
5046                 dev_err(&adapter->pdev->dev,
5047                         "be_work memory allocation failed\n");
5048                 return NULL;
5049         }
5050
5051         INIT_WORK(&work->work, func);
5052         work->adapter = adapter;
5053         return work;
5054 }
5055
5056 /* VxLAN offload Notes:
5057  *
5058  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5059  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5060  * is expected to work across all types of IP tunnels once exported. Skyhawk
5061  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5062  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5063  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5064  * those other tunnels are unexported on the fly through ndo_features_check().
5065  *
5066  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5067  * adds more than one port, disable offloads and re-enable them again when
5068  * there's only one port left. We maintain a list of ports for this purpose.
5069  */
5070 static void be_work_add_vxlan_port(struct work_struct *work)
5071 {
5072         struct be_cmd_work *cmd_work =
5073                                 container_of(work, struct be_cmd_work, work);
5074         struct be_adapter *adapter = cmd_work->adapter;
5075         struct device *dev = &adapter->pdev->dev;
5076         __be16 port = cmd_work->info.vxlan_port;
5077         struct be_vxlan_port *vxlan_port;
5078         int status;
5079
5080         /* Bump up the alias count if it is an existing port */
5081         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5082                 if (vxlan_port->port == port) {
5083                         vxlan_port->port_aliases++;
5084                         goto done;
5085                 }
5086         }
5087
5088         /* Add a new port to our list. We don't need a lock here since port
5089          * add/delete are done only in the context of a single-threaded work
5090          * queue (be_wq).
5091          */
5092         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5093         if (!vxlan_port)
5094                 goto done;
5095
5096         vxlan_port->port = port;
5097         INIT_LIST_HEAD(&vxlan_port->list);
5098         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5099         adapter->vxlan_port_count++;
5100
5101         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5102                 dev_info(dev,
5103                          "Only one UDP port supported for VxLAN offloads\n");
5104                 dev_info(dev, "Disabling VxLAN offloads\n");
5105                 goto err;
5106         }
5107
5108         if (adapter->vxlan_port_count > 1)
5109                 goto done;
5110
5111         status = be_enable_vxlan_offloads(adapter);
5112         if (!status)
5113                 goto done;
5114
5115 err:
5116         be_disable_vxlan_offloads(adapter);
5117 done:
5118         kfree(cmd_work);
5119         return;
5120 }
5121
5122 static void be_work_del_vxlan_port(struct work_struct *work)
5123 {
5124         struct be_cmd_work *cmd_work =
5125                                 container_of(work, struct be_cmd_work, work);
5126         struct be_adapter *adapter = cmd_work->adapter;
5127         __be16 port = cmd_work->info.vxlan_port;
5128         struct be_vxlan_port *vxlan_port;
5129
5130         /* Nothing to be done if a port alias is being deleted */
5131         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5132                 if (vxlan_port->port == port) {
5133                         if (vxlan_port->port_aliases) {
5134                                 vxlan_port->port_aliases--;
5135                                 goto done;
5136                         }
5137                         break;
5138                 }
5139         }
5140
5141         /* No port aliases left; delete the port from the list */
5142         list_del(&vxlan_port->list);
5143         adapter->vxlan_port_count--;
5144
5145         /* Disable VxLAN offload if this is the offloaded port */
5146         if (adapter->vxlan_port == vxlan_port->port) {
5147                 WARN_ON(adapter->vxlan_port_count);
5148                 be_disable_vxlan_offloads(adapter);
5149                 dev_info(&adapter->pdev->dev,
5150                          "Disabled VxLAN offloads for UDP port %d\n",
5151                          be16_to_cpu(port));
5152                 goto out;
5153         }
5154
5155         /* If only 1 port is left, re-enable VxLAN offload */
5156         if (adapter->vxlan_port_count == 1)
5157                 be_enable_vxlan_offloads(adapter);
5158
5159 out:
5160         kfree(vxlan_port);
5161 done:
5162         kfree(cmd_work);
5163 }
5164
5165 static void be_cfg_vxlan_port(struct net_device *netdev,
5166                               struct udp_tunnel_info *ti,
5167                               void (*func)(struct work_struct *))
5168 {
5169         struct be_adapter *adapter = netdev_priv(netdev);
5170         struct be_cmd_work *cmd_work;
5171
5172         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5173                 return;
5174
5175         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5176                 return;
5177
5178         cmd_work = be_alloc_work(adapter, func);
5179         if (cmd_work) {
5180                 cmd_work->info.vxlan_port = ti->port;
5181                 queue_work(be_wq, &cmd_work->work);
5182         }
5183 }
5184
5185 static void be_del_vxlan_port(struct net_device *netdev,
5186                               struct udp_tunnel_info *ti)
5187 {
5188         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5189 }
5190
5191 static void be_add_vxlan_port(struct net_device *netdev,
5192                               struct udp_tunnel_info *ti)
5193 {
5194         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5195 }
5196
5197 static netdev_features_t be_features_check(struct sk_buff *skb,
5198                                            struct net_device *dev,
5199                                            netdev_features_t features)
5200 {
5201         struct be_adapter *adapter = netdev_priv(dev);
5202         u8 l4_hdr = 0;
5203
5204         if (skb_is_gso(skb)) {
5205                 /* IPv6 TSO requests with extension hdrs are a problem
5206                  * to Lancer and BE3 HW. Disable TSO6 feature.
5207                  */
5208                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5209                         features &= ~NETIF_F_TSO6;
5210
5211                 /* Lancer cannot handle the packet with MSS less than 256.
5212                  * Also it can't handle a TSO packet with a single segment
5213                  * Disable the GSO support in such cases
5214                  */
5215                 if (lancer_chip(adapter) &&
5216                     (skb_shinfo(skb)->gso_size < 256 ||
5217                      skb_shinfo(skb)->gso_segs == 1))
5218                         features &= ~NETIF_F_GSO_MASK;
5219         }
5220
5221         /* The code below restricts offload features for some tunneled and
5222          * Q-in-Q packets.
5223          * Offload features for normal (non tunnel) packets are unchanged.
5224          */
5225         features = vlan_features_check(skb, features);
5226         if (!skb->encapsulation ||
5227             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5228                 return features;
5229
5230         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5231          * should disable tunnel offload features if it's not a VxLAN packet,
5232          * as tunnel offloads have been enabled only for VxLAN. This is done to
5233          * allow other tunneled traffic like GRE work fine while VxLAN
5234          * offloads are configured in Skyhawk-R.
5235          */
5236         switch (vlan_get_protocol(skb)) {
5237         case htons(ETH_P_IP):
5238                 l4_hdr = ip_hdr(skb)->protocol;
5239                 break;
5240         case htons(ETH_P_IPV6):
5241                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5242                 break;
5243         default:
5244                 return features;
5245         }
5246
5247         if (l4_hdr != IPPROTO_UDP ||
5248             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5249             skb->inner_protocol != htons(ETH_P_TEB) ||
5250             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5251                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5252             !adapter->vxlan_port ||
5253             udp_hdr(skb)->dest != adapter->vxlan_port)
5254                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5255
5256         return features;
5257 }
5258
5259 static int be_get_phys_port_id(struct net_device *dev,
5260                                struct netdev_phys_item_id *ppid)
5261 {
5262         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5263         struct be_adapter *adapter = netdev_priv(dev);
5264         u8 *id;
5265
5266         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5267                 return -ENOSPC;
5268
5269         ppid->id[0] = adapter->hba_port_num + 1;
5270         id = &ppid->id[1];
5271         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5272              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5273                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5274
5275         ppid->id_len = id_len;
5276
5277         return 0;
5278 }
5279
5280 static void be_set_rx_mode(struct net_device *dev)
5281 {
5282         struct be_adapter *adapter = netdev_priv(dev);
5283         struct be_cmd_work *work;
5284
5285         work = be_alloc_work(adapter, be_work_set_rx_mode);
5286         if (work)
5287                 queue_work(be_wq, &work->work);
5288 }
5289
5290 static const struct net_device_ops be_netdev_ops = {
5291         .ndo_open               = be_open,
5292         .ndo_stop               = be_close,
5293         .ndo_start_xmit         = be_xmit,
5294         .ndo_set_rx_mode        = be_set_rx_mode,
5295         .ndo_set_mac_address    = be_mac_addr_set,
5296         .ndo_get_stats64        = be_get_stats64,
5297         .ndo_validate_addr      = eth_validate_addr,
5298         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5299         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5300         .ndo_set_vf_mac         = be_set_vf_mac,
5301         .ndo_set_vf_vlan        = be_set_vf_vlan,
5302         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5303         .ndo_get_vf_config      = be_get_vf_config,
5304         .ndo_set_vf_link_state  = be_set_vf_link_state,
5305         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5306         .ndo_tx_timeout         = be_tx_timeout,
5307 #ifdef CONFIG_NET_POLL_CONTROLLER
5308         .ndo_poll_controller    = be_netpoll,
5309 #endif
5310         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5311         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5312         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5313         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5314         .ndo_features_check     = be_features_check,
5315         .ndo_get_phys_port_id   = be_get_phys_port_id,
5316 };
5317
5318 static void be_netdev_init(struct net_device *netdev)
5319 {
5320         struct be_adapter *adapter = netdev_priv(netdev);
5321
5322         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5323                 NETIF_F_GSO_UDP_TUNNEL |
5324                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5325                 NETIF_F_HW_VLAN_CTAG_TX;
5326         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5327                 netdev->hw_features |= NETIF_F_RXHASH;
5328
5329         netdev->features |= netdev->hw_features |
5330                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5331
5332         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5333                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5334
5335         netdev->priv_flags |= IFF_UNICAST_FLT;
5336
5337         netdev->flags |= IFF_MULTICAST;
5338
5339         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5340
5341         netdev->netdev_ops = &be_netdev_ops;
5342
5343         netdev->ethtool_ops = &be_ethtool_ops;
5344
5345         /* MTU range: 256 - 9000 */
5346         netdev->min_mtu = BE_MIN_MTU;
5347         netdev->max_mtu = BE_MAX_MTU;
5348 }
5349
5350 static void be_cleanup(struct be_adapter *adapter)
5351 {
5352         struct net_device *netdev = adapter->netdev;
5353
5354         rtnl_lock();
5355         netif_device_detach(netdev);
5356         if (netif_running(netdev))
5357                 be_close(netdev);
5358         rtnl_unlock();
5359
5360         be_clear(adapter);
5361 }
5362
5363 static int be_resume(struct be_adapter *adapter)
5364 {
5365         struct net_device *netdev = adapter->netdev;
5366         int status;
5367
5368         status = be_setup(adapter);
5369         if (status)
5370                 return status;
5371
5372         rtnl_lock();
5373         if (netif_running(netdev))
5374                 status = be_open(netdev);
5375         rtnl_unlock();
5376
5377         if (status)
5378                 return status;
5379
5380         netif_device_attach(netdev);
5381
5382         return 0;
5383 }
5384
5385 static void be_soft_reset(struct be_adapter *adapter)
5386 {
5387         u32 val;
5388
5389         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5390         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5391         val |= SLIPORT_SOFTRESET_SR_MASK;
5392         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5393 }
5394
5395 static bool be_err_is_recoverable(struct be_adapter *adapter)
5396 {
5397         struct be_error_recovery *err_rec = &adapter->error_recovery;
5398         unsigned long initial_idle_time =
5399                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5400         unsigned long recovery_interval =
5401                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5402         u16 ue_err_code;
5403         u32 val;
5404
5405         val = be_POST_stage_get(adapter);
5406         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5407                 return false;
5408         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5409         if (ue_err_code == 0)
5410                 return false;
5411
5412         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5413                 ue_err_code);
5414
5415         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5416                 dev_err(&adapter->pdev->dev,
5417                         "Cannot recover within %lu sec from driver load\n",
5418                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5419                 return false;
5420         }
5421
5422         if (err_rec->last_recovery_time && time_before_eq(
5423                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5424                 dev_err(&adapter->pdev->dev,
5425                         "Cannot recover within %lu sec from last recovery\n",
5426                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5427                 return false;
5428         }
5429
5430         if (ue_err_code == err_rec->last_err_code) {
5431                 dev_err(&adapter->pdev->dev,
5432                         "Cannot recover from a consecutive TPE error\n");
5433                 return false;
5434         }
5435
5436         err_rec->last_recovery_time = jiffies;
5437         err_rec->last_err_code = ue_err_code;
5438         return true;
5439 }
5440
5441 static int be_tpe_recover(struct be_adapter *adapter)
5442 {
5443         struct be_error_recovery *err_rec = &adapter->error_recovery;
5444         int status = -EAGAIN;
5445         u32 val;
5446
5447         switch (err_rec->recovery_state) {
5448         case ERR_RECOVERY_ST_NONE:
5449                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5450                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5451                 break;
5452
5453         case ERR_RECOVERY_ST_DETECT:
5454                 val = be_POST_stage_get(adapter);
5455                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5456                     POST_STAGE_RECOVERABLE_ERR) {
5457                         dev_err(&adapter->pdev->dev,
5458                                 "Unrecoverable HW error detected: 0x%x\n", val);
5459                         status = -EINVAL;
5460                         err_rec->resched_delay = 0;
5461                         break;
5462                 }
5463
5464                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5465
5466                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5467                  * milliseconds before it checks for final error status in
5468                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5469                  * If it does, then PF0 initiates a Soft Reset.
5470                  */
5471                 if (adapter->pf_num == 0) {
5472                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5473                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5474                                         ERR_RECOVERY_UE_DETECT_DURATION;
5475                         break;
5476                 }
5477
5478                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5479                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5480                                         ERR_RECOVERY_UE_DETECT_DURATION;
5481                 break;
5482
5483         case ERR_RECOVERY_ST_RESET:
5484                 if (!be_err_is_recoverable(adapter)) {
5485                         dev_err(&adapter->pdev->dev,
5486                                 "Failed to meet recovery criteria\n");
5487                         status = -EIO;
5488                         err_rec->resched_delay = 0;
5489                         break;
5490                 }
5491                 be_soft_reset(adapter);
5492                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5493                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5494                                         err_rec->ue_to_reset_time;
5495                 break;
5496
5497         case ERR_RECOVERY_ST_PRE_POLL:
5498                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5499                 err_rec->resched_delay = 0;
5500                 status = 0;                     /* done */
5501                 break;
5502
5503         default:
5504                 status = -EINVAL;
5505                 err_rec->resched_delay = 0;
5506                 break;
5507         }
5508
5509         return status;
5510 }
5511
5512 static int be_err_recover(struct be_adapter *adapter)
5513 {
5514         int status;
5515
5516         if (!lancer_chip(adapter)) {
5517                 if (!adapter->error_recovery.recovery_supported ||
5518                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5519                         return -EIO;
5520                 status = be_tpe_recover(adapter);
5521                 if (status)
5522                         goto err;
5523         }
5524
5525         /* Wait for adapter to reach quiescent state before
5526          * destroying queues
5527          */
5528         status = be_fw_wait_ready(adapter);
5529         if (status)
5530                 goto err;
5531
5532         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5533
5534         be_cleanup(adapter);
5535
5536         status = be_resume(adapter);
5537         if (status)
5538                 goto err;
5539
5540         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5541
5542 err:
5543         return status;
5544 }
5545
5546 static void be_err_detection_task(struct work_struct *work)
5547 {
5548         struct be_error_recovery *err_rec =
5549                         container_of(work, struct be_error_recovery,
5550                                      err_detection_work.work);
5551         struct be_adapter *adapter =
5552                         container_of(err_rec, struct be_adapter,
5553                                      error_recovery);
5554         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5555         struct device *dev = &adapter->pdev->dev;
5556         int recovery_status;
5557
5558         be_detect_error(adapter);
5559         if (!be_check_error(adapter, BE_ERROR_HW))
5560                 goto reschedule_task;
5561
5562         recovery_status = be_err_recover(adapter);
5563         if (!recovery_status) {
5564                 err_rec->recovery_retries = 0;
5565                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5566                 dev_info(dev, "Adapter recovery successful\n");
5567                 goto reschedule_task;
5568         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5569                 /* BEx/SH recovery state machine */
5570                 if (adapter->pf_num == 0 &&
5571                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5572                         dev_err(&adapter->pdev->dev,
5573                                 "Adapter recovery in progress\n");
5574                 resched_delay = err_rec->resched_delay;
5575                 goto reschedule_task;
5576         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5577                 /* For VFs, check if PF have allocated resources
5578                  * every second.
5579                  */
5580                 dev_err(dev, "Re-trying adapter recovery\n");
5581                 goto reschedule_task;
5582         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5583                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5584                 /* In case of another error during recovery, it takes 30 sec
5585                  * for adapter to come out of error. Retry error recovery after
5586                  * this time interval.
5587                  */
5588                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5589                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5590                 goto reschedule_task;
5591         } else {
5592                 dev_err(dev, "Adapter recovery failed\n");
5593                 dev_err(dev, "Please reboot server to recover\n");
5594         }
5595
5596         return;
5597
5598 reschedule_task:
5599         be_schedule_err_detection(adapter, resched_delay);
5600 }
5601
5602 static void be_log_sfp_info(struct be_adapter *adapter)
5603 {
5604         int status;
5605
5606         status = be_cmd_query_sfp_info(adapter);
5607         if (!status) {
5608                 dev_err(&adapter->pdev->dev,
5609                         "Port %c: %s Vendor: %s part no: %s",
5610                         adapter->port_name,
5611                         be_misconfig_evt_port_state[adapter->phy_state],
5612                         adapter->phy.vendor_name,
5613                         adapter->phy.vendor_pn);
5614         }
5615         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5616 }
5617
5618 static void be_worker(struct work_struct *work)
5619 {
5620         struct be_adapter *adapter =
5621                 container_of(work, struct be_adapter, work.work);
5622         struct be_rx_obj *rxo;
5623         int i;
5624
5625         if (be_physfn(adapter) &&
5626             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5627                 be_cmd_get_die_temperature(adapter);
5628
5629         /* when interrupts are not yet enabled, just reap any pending
5630          * mcc completions
5631          */
5632         if (!netif_running(adapter->netdev)) {
5633                 be_process_mcc(adapter);
5634                 goto reschedule;
5635         }
5636
5637         if (!adapter->stats_cmd_sent) {
5638                 if (lancer_chip(adapter))
5639                         lancer_cmd_get_pport_stats(adapter,
5640                                                    &adapter->stats_cmd);
5641                 else
5642                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5643         }
5644
5645         for_all_rx_queues(adapter, rxo, i) {
5646                 /* Replenish RX-queues starved due to memory
5647                  * allocation failures.
5648                  */
5649                 if (rxo->rx_post_starved)
5650                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5651         }
5652
5653         /* EQ-delay update for Skyhawk is done while notifying EQ */
5654         if (!skyhawk_chip(adapter))
5655                 be_eqd_update(adapter, false);
5656
5657         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5658                 be_log_sfp_info(adapter);
5659
5660 reschedule:
5661         adapter->work_counter++;
5662         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5663 }
5664
5665 static void be_unmap_pci_bars(struct be_adapter *adapter)
5666 {
5667         if (adapter->csr)
5668                 pci_iounmap(adapter->pdev, adapter->csr);
5669         if (adapter->db)
5670                 pci_iounmap(adapter->pdev, adapter->db);
5671         if (adapter->pcicfg && adapter->pcicfg_mapped)
5672                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5673 }
5674
5675 static int db_bar(struct be_adapter *adapter)
5676 {
5677         if (lancer_chip(adapter) || be_virtfn(adapter))
5678                 return 0;
5679         else
5680                 return 4;
5681 }
5682
5683 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5684 {
5685         if (skyhawk_chip(adapter)) {
5686                 adapter->roce_db.size = 4096;
5687                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5688                                                               db_bar(adapter));
5689                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5690                                                                db_bar(adapter));
5691         }
5692         return 0;
5693 }
5694
5695 static int be_map_pci_bars(struct be_adapter *adapter)
5696 {
5697         struct pci_dev *pdev = adapter->pdev;
5698         u8 __iomem *addr;
5699         u32 sli_intf;
5700
5701         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5702         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5703                                 SLI_INTF_FAMILY_SHIFT;
5704         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5705
5706         if (BEx_chip(adapter) && be_physfn(adapter)) {
5707                 adapter->csr = pci_iomap(pdev, 2, 0);
5708                 if (!adapter->csr)
5709                         return -ENOMEM;
5710         }
5711
5712         addr = pci_iomap(pdev, db_bar(adapter), 0);
5713         if (!addr)
5714                 goto pci_map_err;
5715         adapter->db = addr;
5716
5717         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5718                 if (be_physfn(adapter)) {
5719                         /* PCICFG is the 2nd BAR in BE2 */
5720                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5721                         if (!addr)
5722                                 goto pci_map_err;
5723                         adapter->pcicfg = addr;
5724                         adapter->pcicfg_mapped = true;
5725                 } else {
5726                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5727                         adapter->pcicfg_mapped = false;
5728                 }
5729         }
5730
5731         be_roce_map_pci_bars(adapter);
5732         return 0;
5733
5734 pci_map_err:
5735         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5736         be_unmap_pci_bars(adapter);
5737         return -ENOMEM;
5738 }
5739
5740 static void be_drv_cleanup(struct be_adapter *adapter)
5741 {
5742         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5743         struct device *dev = &adapter->pdev->dev;
5744
5745         if (mem->va)
5746                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5747
5748         mem = &adapter->rx_filter;
5749         if (mem->va)
5750                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5751
5752         mem = &adapter->stats_cmd;
5753         if (mem->va)
5754                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5755 }
5756
5757 /* Allocate and initialize various fields in be_adapter struct */
5758 static int be_drv_init(struct be_adapter *adapter)
5759 {
5760         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5761         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5762         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5763         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5764         struct device *dev = &adapter->pdev->dev;
5765         int status = 0;
5766
5767         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5768         mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5769                                                 &mbox_mem_alloc->dma,
5770                                                 GFP_KERNEL);
5771         if (!mbox_mem_alloc->va)
5772                 return -ENOMEM;
5773
5774         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5775         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5776         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5777
5778         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5779         rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5780                                            &rx_filter->dma, GFP_KERNEL);
5781         if (!rx_filter->va) {
5782                 status = -ENOMEM;
5783                 goto free_mbox;
5784         }
5785
5786         if (lancer_chip(adapter))
5787                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5788         else if (BE2_chip(adapter))
5789                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5790         else if (BE3_chip(adapter))
5791                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5792         else
5793                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5794         stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5795                                            &stats_cmd->dma, GFP_KERNEL);
5796         if (!stats_cmd->va) {
5797                 status = -ENOMEM;
5798                 goto free_rx_filter;
5799         }
5800
5801         mutex_init(&adapter->mbox_lock);
5802         mutex_init(&adapter->mcc_lock);
5803         mutex_init(&adapter->rx_filter_lock);
5804         spin_lock_init(&adapter->mcc_cq_lock);
5805         init_completion(&adapter->et_cmd_compl);
5806
5807         pci_save_state(adapter->pdev);
5808
5809         INIT_DELAYED_WORK(&adapter->work, be_worker);
5810
5811         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5812         adapter->error_recovery.resched_delay = 0;
5813         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5814                           be_err_detection_task);
5815
5816         adapter->rx_fc = true;
5817         adapter->tx_fc = true;
5818
5819         /* Must be a power of 2 or else MODULO will BUG_ON */
5820         adapter->be_get_temp_freq = 64;
5821
5822         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5823         return 0;
5824
5825 free_rx_filter:
5826         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5827 free_mbox:
5828         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5829                           mbox_mem_alloc->dma);
5830         return status;
5831 }
5832
5833 static void be_remove(struct pci_dev *pdev)
5834 {
5835         struct be_adapter *adapter = pci_get_drvdata(pdev);
5836
5837         if (!adapter)
5838                 return;
5839
5840         be_roce_dev_remove(adapter);
5841         be_intr_set(adapter, false);
5842
5843         be_cancel_err_detection(adapter);
5844
5845         unregister_netdev(adapter->netdev);
5846
5847         be_clear(adapter);
5848
5849         if (!pci_vfs_assigned(adapter->pdev))
5850                 be_cmd_reset_function(adapter);
5851
5852         /* tell fw we're done with firing cmds */
5853         be_cmd_fw_clean(adapter);
5854
5855         be_unmap_pci_bars(adapter);
5856         be_drv_cleanup(adapter);
5857
5858         pci_disable_pcie_error_reporting(pdev);
5859
5860         pci_release_regions(pdev);
5861         pci_disable_device(pdev);
5862
5863         free_netdev(adapter->netdev);
5864 }
5865
5866 static ssize_t be_hwmon_show_temp(struct device *dev,
5867                                   struct device_attribute *dev_attr,
5868                                   char *buf)
5869 {
5870         struct be_adapter *adapter = dev_get_drvdata(dev);
5871
5872         /* Unit: millidegree Celsius */
5873         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5874                 return -EIO;
5875         else
5876                 return sprintf(buf, "%u\n",
5877                                adapter->hwmon_info.be_on_die_temp * 1000);
5878 }
5879
5880 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5881                           be_hwmon_show_temp, NULL, 1);
5882
5883 static struct attribute *be_hwmon_attrs[] = {
5884         &sensor_dev_attr_temp1_input.dev_attr.attr,
5885         NULL
5886 };
5887
5888 ATTRIBUTE_GROUPS(be_hwmon);
5889
5890 static char *mc_name(struct be_adapter *adapter)
5891 {
5892         char *str = ""; /* default */
5893
5894         switch (adapter->mc_type) {
5895         case UMC:
5896                 str = "UMC";
5897                 break;
5898         case FLEX10:
5899                 str = "FLEX10";
5900                 break;
5901         case vNIC1:
5902                 str = "vNIC-1";
5903                 break;
5904         case nPAR:
5905                 str = "nPAR";
5906                 break;
5907         case UFP:
5908                 str = "UFP";
5909                 break;
5910         case vNIC2:
5911                 str = "vNIC-2";
5912                 break;
5913         default:
5914                 str = "";
5915         }
5916
5917         return str;
5918 }
5919
5920 static inline char *func_name(struct be_adapter *adapter)
5921 {
5922         return be_physfn(adapter) ? "PF" : "VF";
5923 }
5924
5925 static inline char *nic_name(struct pci_dev *pdev)
5926 {
5927         switch (pdev->device) {
5928         case OC_DEVICE_ID1:
5929                 return OC_NAME;
5930         case OC_DEVICE_ID2:
5931                 return OC_NAME_BE;
5932         case OC_DEVICE_ID3:
5933         case OC_DEVICE_ID4:
5934                 return OC_NAME_LANCER;
5935         case BE_DEVICE_ID2:
5936                 return BE3_NAME;
5937         case OC_DEVICE_ID5:
5938         case OC_DEVICE_ID6:
5939                 return OC_NAME_SH;
5940         default:
5941                 return BE_NAME;
5942         }
5943 }
5944
5945 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5946 {
5947         struct be_adapter *adapter;
5948         struct net_device *netdev;
5949         int status = 0;
5950
5951         status = pci_enable_device(pdev);
5952         if (status)
5953                 goto do_none;
5954
5955         status = pci_request_regions(pdev, DRV_NAME);
5956         if (status)
5957                 goto disable_dev;
5958         pci_set_master(pdev);
5959
5960         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5961         if (!netdev) {
5962                 status = -ENOMEM;
5963                 goto rel_reg;
5964         }
5965         adapter = netdev_priv(netdev);
5966         adapter->pdev = pdev;
5967         pci_set_drvdata(pdev, adapter);
5968         adapter->netdev = netdev;
5969         SET_NETDEV_DEV(netdev, &pdev->dev);
5970
5971         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5972         if (!status) {
5973                 netdev->features |= NETIF_F_HIGHDMA;
5974         } else {
5975                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5976                 if (status) {
5977                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5978                         goto free_netdev;
5979                 }
5980         }
5981
5982         status = pci_enable_pcie_error_reporting(pdev);
5983         if (!status)
5984                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5985
5986         status = be_map_pci_bars(adapter);
5987         if (status)
5988                 goto free_netdev;
5989
5990         status = be_drv_init(adapter);
5991         if (status)
5992                 goto unmap_bars;
5993
5994         status = be_setup(adapter);
5995         if (status)
5996                 goto drv_cleanup;
5997
5998         be_netdev_init(netdev);
5999         status = register_netdev(netdev);
6000         if (status != 0)
6001                 goto unsetup;
6002
6003         be_roce_dev_add(adapter);
6004
6005         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6006         adapter->error_recovery.probe_time = jiffies;
6007
6008         /* On Die temperature not supported for VF. */
6009         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6010                 adapter->hwmon_info.hwmon_dev =
6011                         devm_hwmon_device_register_with_groups(&pdev->dev,
6012                                                                DRV_NAME,
6013                                                                adapter,
6014                                                                be_hwmon_groups);
6015                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6016         }
6017
6018         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6019                  func_name(adapter), mc_name(adapter), adapter->port_name);
6020
6021         return 0;
6022
6023 unsetup:
6024         be_clear(adapter);
6025 drv_cleanup:
6026         be_drv_cleanup(adapter);
6027 unmap_bars:
6028         be_unmap_pci_bars(adapter);
6029 free_netdev:
6030         free_netdev(netdev);
6031 rel_reg:
6032         pci_release_regions(pdev);
6033 disable_dev:
6034         pci_disable_device(pdev);
6035 do_none:
6036         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6037         return status;
6038 }
6039
6040 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6041 {
6042         struct be_adapter *adapter = pci_get_drvdata(pdev);
6043
6044         be_intr_set(adapter, false);
6045         be_cancel_err_detection(adapter);
6046
6047         be_cleanup(adapter);
6048
6049         pci_save_state(pdev);
6050         pci_disable_device(pdev);
6051         pci_set_power_state(pdev, pci_choose_state(pdev, state));
6052         return 0;
6053 }
6054
6055 static int be_pci_resume(struct pci_dev *pdev)
6056 {
6057         struct be_adapter *adapter = pci_get_drvdata(pdev);
6058         int status = 0;
6059
6060         status = pci_enable_device(pdev);
6061         if (status)
6062                 return status;
6063
6064         pci_restore_state(pdev);
6065
6066         status = be_resume(adapter);
6067         if (status)
6068                 return status;
6069
6070         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6071
6072         return 0;
6073 }
6074
6075 /*
6076  * An FLR will stop BE from DMAing any data.
6077  */
6078 static void be_shutdown(struct pci_dev *pdev)
6079 {
6080         struct be_adapter *adapter = pci_get_drvdata(pdev);
6081
6082         if (!adapter)
6083                 return;
6084
6085         be_roce_dev_shutdown(adapter);
6086         cancel_delayed_work_sync(&adapter->work);
6087         be_cancel_err_detection(adapter);
6088
6089         netif_device_detach(adapter->netdev);
6090
6091         be_cmd_reset_function(adapter);
6092
6093         pci_disable_device(pdev);
6094 }
6095
6096 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6097                                             pci_channel_state_t state)
6098 {
6099         struct be_adapter *adapter = pci_get_drvdata(pdev);
6100
6101         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6102
6103         be_roce_dev_remove(adapter);
6104
6105         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6106                 be_set_error(adapter, BE_ERROR_EEH);
6107
6108                 be_cancel_err_detection(adapter);
6109
6110                 be_cleanup(adapter);
6111         }
6112
6113         if (state == pci_channel_io_perm_failure)
6114                 return PCI_ERS_RESULT_DISCONNECT;
6115
6116         pci_disable_device(pdev);
6117
6118         /* The error could cause the FW to trigger a flash debug dump.
6119          * Resetting the card while flash dump is in progress
6120          * can cause it not to recover; wait for it to finish.
6121          * Wait only for first function as it is needed only once per
6122          * adapter.
6123          */
6124         if (pdev->devfn == 0)
6125                 ssleep(30);
6126
6127         return PCI_ERS_RESULT_NEED_RESET;
6128 }
6129
6130 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6131 {
6132         struct be_adapter *adapter = pci_get_drvdata(pdev);
6133         int status;
6134
6135         dev_info(&adapter->pdev->dev, "EEH reset\n");
6136
6137         status = pci_enable_device(pdev);
6138         if (status)
6139                 return PCI_ERS_RESULT_DISCONNECT;
6140
6141         pci_set_master(pdev);
6142         pci_restore_state(pdev);
6143
6144         /* Check if card is ok and fw is ready */
6145         dev_info(&adapter->pdev->dev,
6146                  "Waiting for FW to be ready after EEH reset\n");
6147         status = be_fw_wait_ready(adapter);
6148         if (status)
6149                 return PCI_ERS_RESULT_DISCONNECT;
6150
6151         be_clear_error(adapter, BE_CLEAR_ALL);
6152         return PCI_ERS_RESULT_RECOVERED;
6153 }
6154
6155 static void be_eeh_resume(struct pci_dev *pdev)
6156 {
6157         int status = 0;
6158         struct be_adapter *adapter = pci_get_drvdata(pdev);
6159
6160         dev_info(&adapter->pdev->dev, "EEH resume\n");
6161
6162         pci_save_state(pdev);
6163
6164         status = be_resume(adapter);
6165         if (status)
6166                 goto err;
6167
6168         be_roce_dev_add(adapter);
6169
6170         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6171         return;
6172 err:
6173         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6174 }
6175
6176 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6177 {
6178         struct be_adapter *adapter = pci_get_drvdata(pdev);
6179         struct be_resources vft_res = {0};
6180         int status;
6181
6182         if (!num_vfs)
6183                 be_vf_clear(adapter);
6184
6185         adapter->num_vfs = num_vfs;
6186
6187         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6188                 dev_warn(&pdev->dev,
6189                          "Cannot disable VFs while they are assigned\n");
6190                 return -EBUSY;
6191         }
6192
6193         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6194          * are equally distributed across the max-number of VFs. The user may
6195          * request only a subset of the max-vfs to be enabled.
6196          * Based on num_vfs, redistribute the resources across num_vfs so that
6197          * each VF will have access to more number of resources.
6198          * This facility is not available in BE3 FW.
6199          * Also, this is done by FW in Lancer chip.
6200          */
6201         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6202                 be_calculate_vf_res(adapter, adapter->num_vfs,
6203                                     &vft_res);
6204                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6205                                                  adapter->num_vfs, &vft_res);
6206                 if (status)
6207                         dev_err(&pdev->dev,
6208                                 "Failed to optimize SR-IOV resources\n");
6209         }
6210
6211         status = be_get_resources(adapter);
6212         if (status)
6213                 return be_cmd_status(status);
6214
6215         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6216         rtnl_lock();
6217         status = be_update_queues(adapter);
6218         rtnl_unlock();
6219         if (status)
6220                 return be_cmd_status(status);
6221
6222         if (adapter->num_vfs)
6223                 status = be_vf_setup(adapter);
6224
6225         if (!status)
6226                 return adapter->num_vfs;
6227
6228         return 0;
6229 }
6230
6231 static const struct pci_error_handlers be_eeh_handlers = {
6232         .error_detected = be_eeh_err_detected,
6233         .slot_reset = be_eeh_reset,
6234         .resume = be_eeh_resume,
6235 };
6236
6237 static struct pci_driver be_driver = {
6238         .name = DRV_NAME,
6239         .id_table = be_dev_ids,
6240         .probe = be_probe,
6241         .remove = be_remove,
6242         .suspend = be_suspend,
6243         .resume = be_pci_resume,
6244         .shutdown = be_shutdown,
6245         .sriov_configure = be_pci_sriov_configure,
6246         .err_handler = &be_eeh_handlers
6247 };
6248
6249 static int __init be_init_module(void)
6250 {
6251         int status;
6252
6253         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6254             rx_frag_size != 2048) {
6255                 printk(KERN_WARNING DRV_NAME
6256                         " : Module param rx_frag_size must be 2048/4096/8192."
6257                         " Using 2048\n");
6258                 rx_frag_size = 2048;
6259         }
6260
6261         if (num_vfs > 0) {
6262                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6263                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6264         }
6265
6266         be_wq = create_singlethread_workqueue("be_wq");
6267         if (!be_wq) {
6268                 pr_warn(DRV_NAME "workqueue creation failed\n");
6269                 return -1;
6270         }
6271
6272         be_err_recovery_workq =
6273                 create_singlethread_workqueue("be_err_recover");
6274         if (!be_err_recovery_workq)
6275                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6276
6277         status = pci_register_driver(&be_driver);
6278         if (status) {
6279                 destroy_workqueue(be_wq);
6280                 be_destroy_err_recovery_workq();
6281         }
6282         return status;
6283 }
6284 module_init(be_init_module);
6285
6286 static void __exit be_exit_module(void)
6287 {
6288         pci_unregister_driver(&be_driver);
6289
6290         be_destroy_err_recovery_workq();
6291
6292         if (be_wq)
6293                 destroy_workqueue(be_wq);
6294 }
6295 module_exit(be_exit_module);