Linux-libre 4.11.5-gnu
[librecmc/linux-libre.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365
366         /* Remember currently programmed MAC */
367         ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369         ether_addr_copy(netdev->dev_addr, addr->sa_data);
370         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371         return 0;
372 err:
373         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374         return status;
375 }
376
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380         if (BE2_chip(adapter)) {
381                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         } else if (BE3_chip(adapter)) {
385                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386
387                 return &cmd->hw_stats;
388         } else {
389                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         }
393 }
394
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398         if (BE2_chip(adapter)) {
399                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         } else if (BE3_chip(adapter)) {
403                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404
405                 return &hw_stats->erx;
406         } else {
407                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         }
411 }
412
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v0 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->rx_pause_frames = port_stats->rx_pause_frames;
424         drvs->rx_crc_errors = port_stats->rx_crc_errors;
425         drvs->rx_control_frames = port_stats->rx_control_frames;
426         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_address_filtered =
441                                         port_stats->rx_address_filtered +
442                                         port_stats->rx_vlan_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448
449         if (adapter->port_num)
450                 drvs->jabber_events = rxf_stats->port1_jabber_events;
451         else
452                 drvs->jabber_events = rxf_stats->port0_jabber_events;
453         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455         drvs->forwarded_packets = rxf_stats->forwarded_packets;
456         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467         struct be_port_rxf_stats_v1 *port_stats =
468                                         &rxf_stats->port[adapter->port_num];
469         struct be_drv_stats *drvs = &adapter->drv_stats;
470
471         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474         drvs->rx_pause_frames = port_stats->rx_pause_frames;
475         drvs->rx_crc_errors = port_stats->rx_crc_errors;
476         drvs->rx_control_frames = port_stats->rx_control_frames;
477         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487         drvs->rx_dropped_header_too_small =
488                 port_stats->rx_dropped_header_too_small;
489         drvs->rx_input_fifo_overflow_drop =
490                 port_stats->rx_input_fifo_overflow_drop;
491         drvs->rx_address_filtered = port_stats->rx_address_filtered;
492         drvs->rx_alignment_symbol_errors =
493                 port_stats->rx_alignment_symbol_errors;
494         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495         drvs->tx_pauseframes = port_stats->tx_pauseframes;
496         drvs->tx_controlframes = port_stats->tx_controlframes;
497         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498         drvs->jabber_events = port_stats->jabber_events;
499         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501         drvs->forwarded_packets = rxf_stats->forwarded_packets;
502         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513         struct be_port_rxf_stats_v2 *port_stats =
514                                         &rxf_stats->port[adapter->port_num];
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516
517         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520         drvs->rx_pause_frames = port_stats->rx_pause_frames;
521         drvs->rx_crc_errors = port_stats->rx_crc_errors;
522         drvs->rx_control_frames = port_stats->rx_control_frames;
523         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533         drvs->rx_dropped_header_too_small =
534                 port_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop =
536                 port_stats->rx_input_fifo_overflow_drop;
537         drvs->rx_address_filtered = port_stats->rx_address_filtered;
538         drvs->rx_alignment_symbol_errors =
539                 port_stats->rx_alignment_symbol_errors;
540         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541         drvs->tx_pauseframes = port_stats->tx_pauseframes;
542         drvs->tx_controlframes = port_stats->tx_controlframes;
543         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544         drvs->jabber_events = port_stats->jabber_events;
545         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547         drvs->forwarded_packets = rxf_stats->forwarded_packets;
548         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552         if (be_roce_supported(adapter)) {
553                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555                 drvs->rx_roce_frames = port_stats->roce_frames_received;
556                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
557                 drvs->roce_drops_payload_len =
558                         port_stats->roce_drops_payload_len;
559         }
560 }
561
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564         struct be_drv_stats *drvs = &adapter->drv_stats;
565         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566
567         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577         drvs->rx_dropped_tcp_length =
578                                 pport_stats->rx_dropped_invalid_tcp_length;
579         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582         drvs->rx_dropped_header_too_small =
583                                 pport_stats->rx_dropped_header_too_small;
584         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585         drvs->rx_address_filtered =
586                                         pport_stats->rx_address_filtered +
587                                         pport_stats->rx_vlan_filtered;
588         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592         drvs->jabber_events = pport_stats->rx_jabbers;
593         drvs->forwarded_packets = pport_stats->num_forwards_lo;
594         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595         drvs->rx_drops_too_many_frags =
596                                 pport_stats->rx_drops_too_many_frags_lo;
597 }
598
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)                   (x & 0xFFFF)
602 #define hi(x)                   (x & 0xFFFF0000)
603         bool wrapped = val < lo(*acc);
604         u32 newacc = hi(*acc) + val;
605
606         if (wrapped)
607                 newacc += 65536;
608         ACCESS_ONCE(*acc) = newacc;
609 }
610
611 static void populate_erx_stats(struct be_adapter *adapter,
612                                struct be_rx_obj *rxo, u32 erx_stat)
613 {
614         if (!BEx_chip(adapter))
615                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616         else
617                 /* below erx HW counter can actually wrap around after
618                  * 65535. Driver accumulates a 32-bit value
619                  */
620                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621                                      (u16)erx_stat);
622 }
623
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627         struct be_rx_obj *rxo;
628         int i;
629         u32 erx_stat;
630
631         if (lancer_chip(adapter)) {
632                 populate_lancer_stats(adapter);
633         } else {
634                 if (BE2_chip(adapter))
635                         populate_be_v0_stats(adapter);
636                 else if (BE3_chip(adapter))
637                         /* for BE3 */
638                         populate_be_v1_stats(adapter);
639                 else
640                         populate_be_v2_stats(adapter);
641
642                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643                 for_all_rx_queues(adapter, rxo, i) {
644                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645                         populate_erx_stats(adapter, rxo, erx_stat);
646                 }
647         }
648 }
649
650 static void be_get_stats64(struct net_device *netdev,
651                            struct rtnl_link_stats64 *stats)
652 {
653         struct be_adapter *adapter = netdev_priv(netdev);
654         struct be_drv_stats *drvs = &adapter->drv_stats;
655         struct be_rx_obj *rxo;
656         struct be_tx_obj *txo;
657         u64 pkts, bytes;
658         unsigned int start;
659         int i;
660
661         for_all_rx_queues(adapter, rxo, i) {
662                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
663
664                 do {
665                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666                         pkts = rx_stats(rxo)->rx_pkts;
667                         bytes = rx_stats(rxo)->rx_bytes;
668                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669                 stats->rx_packets += pkts;
670                 stats->rx_bytes += bytes;
671                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673                                         rx_stats(rxo)->rx_drops_no_frags;
674         }
675
676         for_all_tx_queues(adapter, txo, i) {
677                 const struct be_tx_stats *tx_stats = tx_stats(txo);
678
679                 do {
680                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681                         pkts = tx_stats(txo)->tx_pkts;
682                         bytes = tx_stats(txo)->tx_bytes;
683                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684                 stats->tx_packets += pkts;
685                 stats->tx_bytes += bytes;
686         }
687
688         /* bad pkts received */
689         stats->rx_errors = drvs->rx_crc_errors +
690                 drvs->rx_alignment_symbol_errors +
691                 drvs->rx_in_range_errors +
692                 drvs->rx_out_range_errors +
693                 drvs->rx_frame_too_long +
694                 drvs->rx_dropped_too_small +
695                 drvs->rx_dropped_too_short +
696                 drvs->rx_dropped_header_too_small +
697                 drvs->rx_dropped_tcp_length +
698                 drvs->rx_dropped_runt;
699
700         /* detailed rx errors */
701         stats->rx_length_errors = drvs->rx_in_range_errors +
702                 drvs->rx_out_range_errors +
703                 drvs->rx_frame_too_long;
704
705         stats->rx_crc_errors = drvs->rx_crc_errors;
706
707         /* frame alignment errors */
708         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709
710         /* receiver fifo overrun */
711         /* drops_no_pbuf is no per i/f, it's per BE card */
712         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713                                 drvs->rx_input_fifo_overflow_drop +
714                                 drvs->rx_drops_no_pbuf;
715 }
716
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719         struct net_device *netdev = adapter->netdev;
720
721         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722                 netif_carrier_off(netdev);
723                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724         }
725
726         if (link_status)
727                 netif_carrier_on(netdev);
728         else
729                 netif_carrier_off(netdev);
730
731         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736         if (skb->encapsulation)
737                 return skb_inner_transport_offset(skb) +
738                        inner_tcp_hdrlen(skb);
739         return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744         struct be_tx_stats *stats = tx_stats(txo);
745         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746         /* Account for headers which get duplicated in TSO pkt */
747         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748
749         u64_stats_update_begin(&stats->sync);
750         stats->tx_reqs++;
751         stats->tx_bytes += skb->len + dup_hdr_len;
752         stats->tx_pkts += tx_pkts;
753         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754                 stats->tx_vxlan_offload_pkts += tx_pkts;
755         u64_stats_update_end(&stats->sync);
756 }
757
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761         /* +1 for the header wrb */
762         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770         wrb->rsvd0 = 0;
771 }
772
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778         wrb->frag_pa_hi = 0;
779         wrb->frag_pa_lo = 0;
780         wrb->frag_len = 0;
781         wrb->rsvd0 = 0;
782 }
783
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785                                      struct sk_buff *skb)
786 {
787         u8 vlan_prio;
788         u16 vlan_tag;
789
790         vlan_tag = skb_vlan_tag_get(skb);
791         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792         /* If vlan priority provided by OS is NOT in available bmap */
793         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795                                 adapter->recommended_prio_bits;
796
797         return vlan_tag;
798 }
799
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803         return (inner_ip_hdr(skb)->version == 4) ?
804                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809         return (ip_hdr(skb)->version == 4) ?
810                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820         return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829                                        struct sk_buff *skb,
830                                        struct be_wrb_params *wrb_params)
831 {
832         u16 proto;
833
834         if (skb_is_gso(skb)) {
835                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
836                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840                 if (skb->encapsulation) {
841                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842                         proto = skb_inner_ip_proto(skb);
843                 } else {
844                         proto = skb_ip_proto(skb);
845                 }
846                 if (proto == IPPROTO_TCP)
847                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848                 else if (proto == IPPROTO_UDP)
849                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850         }
851
852         if (skb_vlan_tag_present(skb)) {
853                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855         }
856
857         BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861                          struct be_eth_hdr_wrb *hdr,
862                          struct be_wrb_params *wrb_params,
863                          struct sk_buff *skb)
864 {
865         memset(hdr, 0, sizeof(*hdr));
866
867         SET_TX_WRB_HDR_BITS(crc, hdr,
868                             BE_WRB_F_GET(wrb_params->features, CRC));
869         SET_TX_WRB_HDR_BITS(ipcs, hdr,
870                             BE_WRB_F_GET(wrb_params->features, IPCS));
871         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872                             BE_WRB_F_GET(wrb_params->features, TCPCS));
873         SET_TX_WRB_HDR_BITS(udpcs, hdr,
874                             BE_WRB_F_GET(wrb_params->features, UDPCS));
875
876         SET_TX_WRB_HDR_BITS(lso, hdr,
877                             BE_WRB_F_GET(wrb_params->features, LSO));
878         SET_TX_WRB_HDR_BITS(lso6, hdr,
879                             BE_WRB_F_GET(wrb_params->features, LSO6));
880         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881
882         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883          * hack is not needed, the evt bit is set while ringing DB.
884          */
885         SET_TX_WRB_HDR_BITS(event, hdr,
886                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887         SET_TX_WRB_HDR_BITS(vlan, hdr,
888                             BE_WRB_F_GET(wrb_params->features, VLAN));
889         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890
891         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893         SET_TX_WRB_HDR_BITS(mgmt, hdr,
894                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898                           bool unmap_single)
899 {
900         dma_addr_t dma;
901         u32 frag_len = le32_to_cpu(wrb->frag_len);
902
903
904         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905                 (u64)le32_to_cpu(wrb->frag_pa_lo);
906         if (frag_len) {
907                 if (unmap_single)
908                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909                 else
910                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911         }
912 }
913
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917         u32 head = txo->q.head;
918
919         queue_head_inc(&txo->q);
920         return head;
921 }
922
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925                                 struct be_tx_obj *txo,
926                                 struct be_wrb_params *wrb_params,
927                                 struct sk_buff *skb, u16 head)
928 {
929         u32 num_frags = skb_wrb_cnt(skb);
930         struct be_queue_info *txq = &txo->q;
931         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932
933         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934         be_dws_cpu_to_le(hdr, sizeof(*hdr));
935
936         BUG_ON(txo->sent_skb_list[head]);
937         txo->sent_skb_list[head] = skb;
938         txo->last_req_hdr = head;
939         atomic_add(num_frags, &txq->used);
940         txo->last_req_wrb_cnt = num_frags;
941         txo->pend_wrb_cnt += num_frags;
942 }
943
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946                                  int len)
947 {
948         struct be_eth_wrb *wrb;
949         struct be_queue_info *txq = &txo->q;
950
951         wrb = queue_head_node(txq);
952         wrb_fill(wrb, busaddr, len);
953         queue_head_inc(txq);
954 }
955
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961                             struct be_tx_obj *txo, u32 head, bool map_single,
962                             u32 copied)
963 {
964         struct device *dev;
965         struct be_eth_wrb *wrb;
966         struct be_queue_info *txq = &txo->q;
967
968         dev = &adapter->pdev->dev;
969         txq->head = head;
970
971         /* skip the first wrb (hdr); it's not mapped */
972         queue_head_inc(txq);
973         while (copied) {
974                 wrb = queue_head_node(txq);
975                 unmap_tx_frag(dev, wrb, map_single);
976                 map_single = false;
977                 copied -= le32_to_cpu(wrb->frag_len);
978                 queue_head_inc(txq);
979         }
980
981         txq->head = head;
982 }
983
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989                            struct sk_buff *skb,
990                            struct be_wrb_params *wrb_params)
991 {
992         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993         struct device *dev = &adapter->pdev->dev;
994         struct be_queue_info *txq = &txo->q;
995         bool map_single = false;
996         u32 head = txq->head;
997         dma_addr_t busaddr;
998         int len;
999
1000         head = be_tx_get_wrb_hdr(txo);
1001
1002         if (skb->len > skb->data_len) {
1003                 len = skb_headlen(skb);
1004
1005                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1006                 if (dma_mapping_error(dev, busaddr))
1007                         goto dma_err;
1008                 map_single = true;
1009                 be_tx_setup_wrb_frag(txo, busaddr, len);
1010                 copied += len;
1011         }
1012
1013         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1014                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1015                 len = skb_frag_size(frag);
1016
1017                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1018                 if (dma_mapping_error(dev, busaddr))
1019                         goto dma_err;
1020                 be_tx_setup_wrb_frag(txo, busaddr, len);
1021                 copied += len;
1022         }
1023
1024         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1025
1026         be_tx_stats_update(txo, skb);
1027         return wrb_cnt;
1028
1029 dma_err:
1030         adapter->drv_stats.dma_map_errors++;
1031         be_xmit_restore(adapter, txo, head, map_single, copied);
1032         return 0;
1033 }
1034
1035 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1036 {
1037         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1038 }
1039
1040 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1041                                              struct sk_buff *skb,
1042                                              struct be_wrb_params
1043                                              *wrb_params)
1044 {
1045         u16 vlan_tag = 0;
1046
1047         skb = skb_share_check(skb, GFP_ATOMIC);
1048         if (unlikely(!skb))
1049                 return skb;
1050
1051         if (skb_vlan_tag_present(skb))
1052                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1053
1054         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1055                 if (!vlan_tag)
1056                         vlan_tag = adapter->pvid;
1057                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1058                  * skip VLAN insertion
1059                  */
1060                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1061         }
1062
1063         if (vlan_tag) {
1064                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1065                                                 vlan_tag);
1066                 if (unlikely(!skb))
1067                         return skb;
1068                 skb->vlan_tci = 0;
1069         }
1070
1071         /* Insert the outer VLAN, if any */
1072         if (adapter->qnq_vid) {
1073                 vlan_tag = adapter->qnq_vid;
1074                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1075                                                 vlan_tag);
1076                 if (unlikely(!skb))
1077                         return skb;
1078                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1079         }
1080
1081         return skb;
1082 }
1083
1084 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1085 {
1086         struct ethhdr *eh = (struct ethhdr *)skb->data;
1087         u16 offset = ETH_HLEN;
1088
1089         if (eh->h_proto == htons(ETH_P_IPV6)) {
1090                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1091
1092                 offset += sizeof(struct ipv6hdr);
1093                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1094                     ip6h->nexthdr != NEXTHDR_UDP) {
1095                         struct ipv6_opt_hdr *ehdr =
1096                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1097
1098                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1099                         if (ehdr->hdrlen == 0xff)
1100                                 return true;
1101                 }
1102         }
1103         return false;
1104 }
1105
1106 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1107 {
1108         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1109 }
1110
1111 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1112 {
1113         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1114 }
1115
1116 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1117                                                   struct sk_buff *skb,
1118                                                   struct be_wrb_params
1119                                                   *wrb_params)
1120 {
1121         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1122         unsigned int eth_hdr_len;
1123         struct iphdr *ip;
1124
1125         /* For padded packets, BE HW modifies tot_len field in IP header
1126          * incorrecly when VLAN tag is inserted by HW.
1127          * For padded packets, Lancer computes incorrect checksum.
1128          */
1129         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1130                                                 VLAN_ETH_HLEN : ETH_HLEN;
1131         if (skb->len <= 60 &&
1132             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1133             is_ipv4_pkt(skb)) {
1134                 ip = (struct iphdr *)ip_hdr(skb);
1135                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1136         }
1137
1138         /* If vlan tag is already inlined in the packet, skip HW VLAN
1139          * tagging in pvid-tagging mode
1140          */
1141         if (be_pvid_tagging_enabled(adapter) &&
1142             veh->h_vlan_proto == htons(ETH_P_8021Q))
1143                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1144
1145         /* HW has a bug wherein it will calculate CSUM for VLAN
1146          * pkts even though it is disabled.
1147          * Manually insert VLAN in pkt.
1148          */
1149         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1150             skb_vlan_tag_present(skb)) {
1151                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1152                 if (unlikely(!skb))
1153                         goto err;
1154         }
1155
1156         /* HW may lockup when VLAN HW tagging is requested on
1157          * certain ipv6 packets. Drop such pkts if the HW workaround to
1158          * skip HW tagging is not enabled by FW.
1159          */
1160         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1161                      (adapter->pvid || adapter->qnq_vid) &&
1162                      !qnq_async_evt_rcvd(adapter)))
1163                 goto tx_drop;
1164
1165         /* Manual VLAN tag insertion to prevent:
1166          * ASIC lockup when the ASIC inserts VLAN tag into
1167          * certain ipv6 packets. Insert VLAN tags in driver,
1168          * and set event, completion, vlan bits accordingly
1169          * in the Tx WRB.
1170          */
1171         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1172             be_vlan_tag_tx_chk(adapter, skb)) {
1173                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1174                 if (unlikely(!skb))
1175                         goto err;
1176         }
1177
1178         return skb;
1179 tx_drop:
1180         dev_kfree_skb_any(skb);
1181 err:
1182         return NULL;
1183 }
1184
1185 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1186                                            struct sk_buff *skb,
1187                                            struct be_wrb_params *wrb_params)
1188 {
1189         int err;
1190
1191         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1192          * packets that are 32b or less may cause a transmit stall
1193          * on that port. The workaround is to pad such packets
1194          * (len <= 32 bytes) to a minimum length of 36b.
1195          */
1196         if (skb->len <= 32) {
1197                 if (skb_put_padto(skb, 36))
1198                         return NULL;
1199         }
1200
1201         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1202                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1203                 if (!skb)
1204                         return NULL;
1205         }
1206
1207         /* The stack can send us skbs with length greater than
1208          * what the HW can handle. Trim the extra bytes.
1209          */
1210         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1211         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1212         WARN_ON(err);
1213
1214         return skb;
1215 }
1216
1217 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1218 {
1219         struct be_queue_info *txq = &txo->q;
1220         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1221
1222         /* Mark the last request eventable if it hasn't been marked already */
1223         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1224                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1225
1226         /* compose a dummy wrb if there are odd set of wrbs to notify */
1227         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1228                 wrb_fill_dummy(queue_head_node(txq));
1229                 queue_head_inc(txq);
1230                 atomic_inc(&txq->used);
1231                 txo->pend_wrb_cnt++;
1232                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1233                                            TX_HDR_WRB_NUM_SHIFT);
1234                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1235                                           TX_HDR_WRB_NUM_SHIFT);
1236         }
1237         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1238         txo->pend_wrb_cnt = 0;
1239 }
1240
1241 /* OS2BMC related */
1242
1243 #define DHCP_CLIENT_PORT        68
1244 #define DHCP_SERVER_PORT        67
1245 #define NET_BIOS_PORT1          137
1246 #define NET_BIOS_PORT2          138
1247 #define DHCPV6_RAS_PORT         547
1248
1249 #define is_mc_allowed_on_bmc(adapter, eh)       \
1250         (!is_multicast_filt_enabled(adapter) && \
1251          is_multicast_ether_addr(eh->h_dest) && \
1252          !is_broadcast_ether_addr(eh->h_dest))
1253
1254 #define is_bc_allowed_on_bmc(adapter, eh)       \
1255         (!is_broadcast_filt_enabled(adapter) && \
1256          is_broadcast_ether_addr(eh->h_dest))
1257
1258 #define is_arp_allowed_on_bmc(adapter, skb)     \
1259         (is_arp(skb) && is_arp_filt_enabled(adapter))
1260
1261 #define is_broadcast_packet(eh, adapter)        \
1262                 (is_multicast_ether_addr(eh->h_dest) && \
1263                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1264
1265 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1266
1267 #define is_arp_filt_enabled(adapter)    \
1268                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1269
1270 #define is_dhcp_client_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1272
1273 #define is_dhcp_srvr_filt_enabled(adapter)      \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1275
1276 #define is_nbios_filt_enabled(adapter)  \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1278
1279 #define is_ipv6_na_filt_enabled(adapter)        \
1280                 (adapter->bmc_filt_mask &       \
1281                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1282
1283 #define is_ipv6_ra_filt_enabled(adapter)        \
1284                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1285
1286 #define is_ipv6_ras_filt_enabled(adapter)       \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1288
1289 #define is_broadcast_filt_enabled(adapter)      \
1290                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1291
1292 #define is_multicast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1294
1295 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1296                                struct sk_buff **skb)
1297 {
1298         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1299         bool os2bmc = false;
1300
1301         if (!be_is_os2bmc_enabled(adapter))
1302                 goto done;
1303
1304         if (!is_multicast_ether_addr(eh->h_dest))
1305                 goto done;
1306
1307         if (is_mc_allowed_on_bmc(adapter, eh) ||
1308             is_bc_allowed_on_bmc(adapter, eh) ||
1309             is_arp_allowed_on_bmc(adapter, (*skb))) {
1310                 os2bmc = true;
1311                 goto done;
1312         }
1313
1314         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1315                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1316                 u8 nexthdr = hdr->nexthdr;
1317
1318                 if (nexthdr == IPPROTO_ICMPV6) {
1319                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1320
1321                         switch (icmp6->icmp6_type) {
1322                         case NDISC_ROUTER_ADVERTISEMENT:
1323                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1324                                 goto done;
1325                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1327                                 goto done;
1328                         default:
1329                                 break;
1330                         }
1331                 }
1332         }
1333
1334         if (is_udp_pkt((*skb))) {
1335                 struct udphdr *udp = udp_hdr((*skb));
1336
1337                 switch (ntohs(udp->dest)) {
1338                 case DHCP_CLIENT_PORT:
1339                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1340                         goto done;
1341                 case DHCP_SERVER_PORT:
1342                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1343                         goto done;
1344                 case NET_BIOS_PORT1:
1345                 case NET_BIOS_PORT2:
1346                         os2bmc = is_nbios_filt_enabled(adapter);
1347                         goto done;
1348                 case DHCPV6_RAS_PORT:
1349                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1350                         goto done;
1351                 default:
1352                         break;
1353                 }
1354         }
1355 done:
1356         /* For packets over a vlan, which are destined
1357          * to BMC, asic expects the vlan to be inline in the packet.
1358          */
1359         if (os2bmc)
1360                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1361
1362         return os2bmc;
1363 }
1364
1365 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1366 {
1367         struct be_adapter *adapter = netdev_priv(netdev);
1368         u16 q_idx = skb_get_queue_mapping(skb);
1369         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1370         struct be_wrb_params wrb_params = { 0 };
1371         bool flush = !skb->xmit_more;
1372         u16 wrb_cnt;
1373
1374         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1375         if (unlikely(!skb))
1376                 goto drop;
1377
1378         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1379
1380         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1381         if (unlikely(!wrb_cnt)) {
1382                 dev_kfree_skb_any(skb);
1383                 goto drop;
1384         }
1385
1386         /* if os2bmc is enabled and if the pkt is destined to bmc,
1387          * enqueue the pkt a 2nd time with mgmt bit set.
1388          */
1389         if (be_send_pkt_to_bmc(adapter, &skb)) {
1390                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1391                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1392                 if (unlikely(!wrb_cnt))
1393                         goto drop;
1394                 else
1395                         skb_get(skb);
1396         }
1397
1398         if (be_is_txq_full(txo)) {
1399                 netif_stop_subqueue(netdev, q_idx);
1400                 tx_stats(txo)->tx_stops++;
1401         }
1402
1403         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1404                 be_xmit_flush(adapter, txo);
1405
1406         return NETDEV_TX_OK;
1407 drop:
1408         tx_stats(txo)->tx_drv_drops++;
1409         /* Flush the already enqueued tx requests */
1410         if (flush && txo->pend_wrb_cnt)
1411                 be_xmit_flush(adapter, txo);
1412
1413         return NETDEV_TX_OK;
1414 }
1415
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424         struct device *dev = &adapter->pdev->dev;
1425         int status;
1426
1427         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428                 return 0;
1429
1430         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431         if (!status) {
1432                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434         } else {
1435                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436         }
1437         return status;
1438 }
1439
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442         struct device *dev = &adapter->pdev->dev;
1443         int status;
1444
1445         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446         if (!status) {
1447                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449         }
1450         return status;
1451 }
1452
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459         struct device *dev = &adapter->pdev->dev;
1460         u16 vids[BE_NUM_VLANS_SUPPORTED];
1461         u16 num = 0, i = 0;
1462         int status = 0;
1463
1464         /* No need to change the VLAN state if the I/F is in promiscuous */
1465         if (adapter->netdev->flags & IFF_PROMISC)
1466                 return 0;
1467
1468         if (adapter->vlans_added > be_max_vlans(adapter))
1469                 return be_set_vlan_promisc(adapter);
1470
1471         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472                 status = be_clear_vlan_promisc(adapter);
1473                 if (status)
1474                         return status;
1475         }
1476         /* Construct VLAN Table to give to HW */
1477         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478                 vids[num++] = cpu_to_le16(i);
1479
1480         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481         if (status) {
1482                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1483                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1484                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485                     addl_status(status) ==
1486                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487                         return be_set_vlan_promisc(adapter);
1488         }
1489         return status;
1490 }
1491
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494         struct be_adapter *adapter = netdev_priv(netdev);
1495         int status = 0;
1496
1497         mutex_lock(&adapter->rx_filter_lock);
1498
1499         /* Packets with VID 0 are always received by Lancer by default */
1500         if (lancer_chip(adapter) && vid == 0)
1501                 goto done;
1502
1503         if (test_bit(vid, adapter->vids))
1504                 goto done;
1505
1506         set_bit(vid, adapter->vids);
1507         adapter->vlans_added++;
1508
1509         status = be_vid_config(adapter);
1510 done:
1511         mutex_unlock(&adapter->rx_filter_lock);
1512         return status;
1513 }
1514
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517         struct be_adapter *adapter = netdev_priv(netdev);
1518         int status = 0;
1519
1520         mutex_lock(&adapter->rx_filter_lock);
1521
1522         /* Packets with VID 0 are always received by Lancer by default */
1523         if (lancer_chip(adapter) && vid == 0)
1524                 goto done;
1525
1526         if (!test_bit(vid, adapter->vids))
1527                 goto done;
1528
1529         clear_bit(vid, adapter->vids);
1530         adapter->vlans_added--;
1531
1532         status = be_vid_config(adapter);
1533 done:
1534         mutex_unlock(&adapter->rx_filter_lock);
1535         return status;
1536 }
1537
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546         int status;
1547
1548         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549                 return;
1550
1551         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552         if (!status)
1553                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558         int status;
1559
1560         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561                 return;
1562
1563         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564         if (!status)
1565                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570         int status;
1571
1572         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573                 return;
1574
1575         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576         if (!status)
1577                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586                              const unsigned char *addr)
1587 {
1588         struct be_adapter *adapter = netdev_priv(netdev);
1589
1590         adapter->update_uc_list = true;
1591         return 0;
1592 }
1593
1594 static int be_mc_list_update(struct net_device *netdev,
1595                              const unsigned char *addr)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598
1599         adapter->update_mc_list = true;
1600         return 0;
1601 }
1602
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605         struct net_device *netdev = adapter->netdev;
1606         struct netdev_hw_addr *ha;
1607         bool mc_promisc = false;
1608         int status;
1609
1610         netif_addr_lock_bh(netdev);
1611         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612
1613         if (netdev->flags & IFF_PROMISC) {
1614                 adapter->update_mc_list = false;
1615         } else if (netdev->flags & IFF_ALLMULTI ||
1616                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617                 /* Enable multicast promisc if num configured exceeds
1618                  * what we support
1619                  */
1620                 mc_promisc = true;
1621                 adapter->update_mc_list = false;
1622         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623                 /* Update mc-list unconditionally if the iface was previously
1624                  * in mc-promisc mode and now is out of that mode.
1625                  */
1626                 adapter->update_mc_list = true;
1627         }
1628
1629         if (adapter->update_mc_list) {
1630                 int i = 0;
1631
1632                 /* cache the mc-list in adapter */
1633                 netdev_for_each_mc_addr(ha, netdev) {
1634                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635                         i++;
1636                 }
1637                 adapter->mc_count = netdev_mc_count(netdev);
1638         }
1639         netif_addr_unlock_bh(netdev);
1640
1641         if (mc_promisc) {
1642                 be_set_mc_promisc(adapter);
1643         } else if (adapter->update_mc_list) {
1644                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645                 if (!status)
1646                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647                 else
1648                         be_set_mc_promisc(adapter);
1649
1650                 adapter->update_mc_list = false;
1651         }
1652 }
1653
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656         struct net_device *netdev = adapter->netdev;
1657
1658         __dev_mc_unsync(netdev, NULL);
1659         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660         adapter->mc_count = 0;
1661 }
1662
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1666                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1667                 return 0;
1668         }
1669
1670         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1671                                adapter->if_handle,
1672                                &adapter->pmac_id[uc_idx + 1], 0);
1673 }
1674
1675 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1676 {
1677         if (pmac_id == adapter->pmac_id[0])
1678                 return;
1679
1680         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1681 }
1682
1683 static void be_set_uc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool uc_promisc = false;
1688         int curr_uc_macs = 0, i;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_uc_list = false;
1695         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1696                 uc_promisc = true;
1697                 adapter->update_uc_list = false;
1698         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1699                 /* Update uc-list unconditionally if the iface was previously
1700                  * in uc-promisc mode and now is out of that mode.
1701                  */
1702                 adapter->update_uc_list = true;
1703         }
1704
1705         if (adapter->update_uc_list) {
1706                 /* cache the uc-list in adapter array */
1707                 i = 0;
1708                 netdev_for_each_uc_addr(ha, netdev) {
1709                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1710                         i++;
1711                 }
1712                 curr_uc_macs = netdev_uc_count(netdev);
1713         }
1714         netif_addr_unlock_bh(netdev);
1715
1716         if (uc_promisc) {
1717                 be_set_uc_promisc(adapter);
1718         } else if (adapter->update_uc_list) {
1719                 be_clear_uc_promisc(adapter);
1720
1721                 for (i = 0; i < adapter->uc_macs; i++)
1722                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1723
1724                 for (i = 0; i < curr_uc_macs; i++)
1725                         be_uc_mac_add(adapter, i);
1726                 adapter->uc_macs = curr_uc_macs;
1727                 adapter->update_uc_list = false;
1728         }
1729 }
1730
1731 static void be_clear_uc_list(struct be_adapter *adapter)
1732 {
1733         struct net_device *netdev = adapter->netdev;
1734         int i;
1735
1736         __dev_uc_unsync(netdev, NULL);
1737         for (i = 0; i < adapter->uc_macs; i++)
1738                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1739
1740         adapter->uc_macs = 0;
1741 }
1742
1743 static void __be_set_rx_mode(struct be_adapter *adapter)
1744 {
1745         struct net_device *netdev = adapter->netdev;
1746
1747         mutex_lock(&adapter->rx_filter_lock);
1748
1749         if (netdev->flags & IFF_PROMISC) {
1750                 if (!be_in_all_promisc(adapter))
1751                         be_set_all_promisc(adapter);
1752         } else if (be_in_all_promisc(adapter)) {
1753                 /* We need to re-program the vlan-list or clear
1754                  * vlan-promisc mode (if needed) when the interface
1755                  * comes out of promisc mode.
1756                  */
1757                 be_vid_config(adapter);
1758         }
1759
1760         be_set_uc_list(adapter);
1761         be_set_mc_list(adapter);
1762
1763         mutex_unlock(&adapter->rx_filter_lock);
1764 }
1765
1766 static void be_work_set_rx_mode(struct work_struct *work)
1767 {
1768         struct be_cmd_work *cmd_work =
1769                                 container_of(work, struct be_cmd_work, work);
1770
1771         __be_set_rx_mode(cmd_work->adapter);
1772         kfree(cmd_work);
1773 }
1774
1775 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1776 {
1777         struct be_adapter *adapter = netdev_priv(netdev);
1778         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1779         int status;
1780
1781         if (!sriov_enabled(adapter))
1782                 return -EPERM;
1783
1784         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1785                 return -EINVAL;
1786
1787         /* Proceed further only if user provided MAC is different
1788          * from active MAC
1789          */
1790         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1791                 return 0;
1792
1793         if (BEx_chip(adapter)) {
1794                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1795                                 vf + 1);
1796
1797                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1798                                          &vf_cfg->pmac_id, vf + 1);
1799         } else {
1800                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1801                                         vf + 1);
1802         }
1803
1804         if (status) {
1805                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1806                         mac, vf, status);
1807                 return be_cmd_status(status);
1808         }
1809
1810         ether_addr_copy(vf_cfg->mac_addr, mac);
1811
1812         return 0;
1813 }
1814
1815 static int be_get_vf_config(struct net_device *netdev, int vf,
1816                             struct ifla_vf_info *vi)
1817 {
1818         struct be_adapter *adapter = netdev_priv(netdev);
1819         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1820
1821         if (!sriov_enabled(adapter))
1822                 return -EPERM;
1823
1824         if (vf >= adapter->num_vfs)
1825                 return -EINVAL;
1826
1827         vi->vf = vf;
1828         vi->max_tx_rate = vf_cfg->tx_rate;
1829         vi->min_tx_rate = 0;
1830         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1831         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1832         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1833         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1834         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1835
1836         return 0;
1837 }
1838
1839 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1840 {
1841         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1842         u16 vids[BE_NUM_VLANS_SUPPORTED];
1843         int vf_if_id = vf_cfg->if_handle;
1844         int status;
1845
1846         /* Enable Transparent VLAN Tagging */
1847         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1848         if (status)
1849                 return status;
1850
1851         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1852         vids[0] = 0;
1853         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1854         if (!status)
1855                 dev_info(&adapter->pdev->dev,
1856                          "Cleared guest VLANs on VF%d", vf);
1857
1858         /* After TVT is enabled, disallow VFs to program VLAN filters */
1859         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1860                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1861                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1862                 if (!status)
1863                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1864         }
1865         return 0;
1866 }
1867
1868 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1869 {
1870         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1871         struct device *dev = &adapter->pdev->dev;
1872         int status;
1873
1874         /* Reset Transparent VLAN Tagging. */
1875         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1876                                        vf_cfg->if_handle, 0, 0);
1877         if (status)
1878                 return status;
1879
1880         /* Allow VFs to program VLAN filtering */
1881         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1882                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1883                                                   BE_PRIV_FILTMGMT, vf + 1);
1884                 if (!status) {
1885                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1886                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1887                 }
1888         }
1889
1890         dev_info(dev,
1891                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1892         return 0;
1893 }
1894
1895 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1896                           __be16 vlan_proto)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900         int status;
1901
1902         if (!sriov_enabled(adapter))
1903                 return -EPERM;
1904
1905         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1906                 return -EINVAL;
1907
1908         if (vlan_proto != htons(ETH_P_8021Q))
1909                 return -EPROTONOSUPPORT;
1910
1911         if (vlan || qos) {
1912                 vlan |= qos << VLAN_PRIO_SHIFT;
1913                 status = be_set_vf_tvt(adapter, vf, vlan);
1914         } else {
1915                 status = be_clear_vf_tvt(adapter, vf);
1916         }
1917
1918         if (status) {
1919                 dev_err(&adapter->pdev->dev,
1920                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1921                         status);
1922                 return be_cmd_status(status);
1923         }
1924
1925         vf_cfg->vlan_tag = vlan;
1926         return 0;
1927 }
1928
1929 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1930                              int min_tx_rate, int max_tx_rate)
1931 {
1932         struct be_adapter *adapter = netdev_priv(netdev);
1933         struct device *dev = &adapter->pdev->dev;
1934         int percent_rate, status = 0;
1935         u16 link_speed = 0;
1936         u8 link_status;
1937
1938         if (!sriov_enabled(adapter))
1939                 return -EPERM;
1940
1941         if (vf >= adapter->num_vfs)
1942                 return -EINVAL;
1943
1944         if (min_tx_rate)
1945                 return -EINVAL;
1946
1947         if (!max_tx_rate)
1948                 goto config_qos;
1949
1950         status = be_cmd_link_status_query(adapter, &link_speed,
1951                                           &link_status, 0);
1952         if (status)
1953                 goto err;
1954
1955         if (!link_status) {
1956                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1957                 status = -ENETDOWN;
1958                 goto err;
1959         }
1960
1961         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1962                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1963                         link_speed);
1964                 status = -EINVAL;
1965                 goto err;
1966         }
1967
1968         /* On Skyhawk the QOS setting must be done only as a % value */
1969         percent_rate = link_speed / 100;
1970         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1971                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1972                         percent_rate);
1973                 status = -EINVAL;
1974                 goto err;
1975         }
1976
1977 config_qos:
1978         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1979         if (status)
1980                 goto err;
1981
1982         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1983         return 0;
1984
1985 err:
1986         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1987                 max_tx_rate, vf);
1988         return be_cmd_status(status);
1989 }
1990
1991 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1992                                 int link_state)
1993 {
1994         struct be_adapter *adapter = netdev_priv(netdev);
1995         int status;
1996
1997         if (!sriov_enabled(adapter))
1998                 return -EPERM;
1999
2000         if (vf >= adapter->num_vfs)
2001                 return -EINVAL;
2002
2003         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2004         if (status) {
2005                 dev_err(&adapter->pdev->dev,
2006                         "Link state change on VF %d failed: %#x\n", vf, status);
2007                 return be_cmd_status(status);
2008         }
2009
2010         adapter->vf_cfg[vf].plink_tracking = link_state;
2011
2012         return 0;
2013 }
2014
2015 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2016 {
2017         struct be_adapter *adapter = netdev_priv(netdev);
2018         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2019         u8 spoofchk;
2020         int status;
2021
2022         if (!sriov_enabled(adapter))
2023                 return -EPERM;
2024
2025         if (vf >= adapter->num_vfs)
2026                 return -EINVAL;
2027
2028         if (BEx_chip(adapter))
2029                 return -EOPNOTSUPP;
2030
2031         if (enable == vf_cfg->spoofchk)
2032                 return 0;
2033
2034         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2035
2036         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2037                                        0, spoofchk);
2038         if (status) {
2039                 dev_err(&adapter->pdev->dev,
2040                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2041                 return be_cmd_status(status);
2042         }
2043
2044         vf_cfg->spoofchk = enable;
2045         return 0;
2046 }
2047
2048 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2049                           ulong now)
2050 {
2051         aic->rx_pkts_prev = rx_pkts;
2052         aic->tx_reqs_prev = tx_pkts;
2053         aic->jiffies = now;
2054 }
2055
2056 static int be_get_new_eqd(struct be_eq_obj *eqo)
2057 {
2058         struct be_adapter *adapter = eqo->adapter;
2059         int eqd, start;
2060         struct be_aic_obj *aic;
2061         struct be_rx_obj *rxo;
2062         struct be_tx_obj *txo;
2063         u64 rx_pkts = 0, tx_pkts = 0;
2064         ulong now;
2065         u32 pps, delta;
2066         int i;
2067
2068         aic = &adapter->aic_obj[eqo->idx];
2069         if (!aic->enable) {
2070                 if (aic->jiffies)
2071                         aic->jiffies = 0;
2072                 eqd = aic->et_eqd;
2073                 return eqd;
2074         }
2075
2076         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2077                 do {
2078                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2079                         rx_pkts += rxo->stats.rx_pkts;
2080                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2081         }
2082
2083         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2084                 do {
2085                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2086                         tx_pkts += txo->stats.tx_reqs;
2087                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2088         }
2089
2090         /* Skip, if wrapped around or first calculation */
2091         now = jiffies;
2092         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2093             rx_pkts < aic->rx_pkts_prev ||
2094             tx_pkts < aic->tx_reqs_prev) {
2095                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2096                 return aic->prev_eqd;
2097         }
2098
2099         delta = jiffies_to_msecs(now - aic->jiffies);
2100         if (delta == 0)
2101                 return aic->prev_eqd;
2102
2103         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2104                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2105         eqd = (pps / 15000) << 2;
2106
2107         if (eqd < 8)
2108                 eqd = 0;
2109         eqd = min_t(u32, eqd, aic->max_eqd);
2110         eqd = max_t(u32, eqd, aic->min_eqd);
2111
2112         be_aic_update(aic, rx_pkts, tx_pkts, now);
2113
2114         return eqd;
2115 }
2116
2117 /* For Skyhawk-R only */
2118 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2119 {
2120         struct be_adapter *adapter = eqo->adapter;
2121         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2122         ulong now = jiffies;
2123         int eqd;
2124         u32 mult_enc;
2125
2126         if (!aic->enable)
2127                 return 0;
2128
2129         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2130                 eqd = aic->prev_eqd;
2131         else
2132                 eqd = be_get_new_eqd(eqo);
2133
2134         if (eqd > 100)
2135                 mult_enc = R2I_DLY_ENC_1;
2136         else if (eqd > 60)
2137                 mult_enc = R2I_DLY_ENC_2;
2138         else if (eqd > 20)
2139                 mult_enc = R2I_DLY_ENC_3;
2140         else
2141                 mult_enc = R2I_DLY_ENC_0;
2142
2143         aic->prev_eqd = eqd;
2144
2145         return mult_enc;
2146 }
2147
2148 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2149 {
2150         struct be_set_eqd set_eqd[MAX_EVT_QS];
2151         struct be_aic_obj *aic;
2152         struct be_eq_obj *eqo;
2153         int i, num = 0, eqd;
2154
2155         for_all_evt_queues(adapter, eqo, i) {
2156                 aic = &adapter->aic_obj[eqo->idx];
2157                 eqd = be_get_new_eqd(eqo);
2158                 if (force_update || eqd != aic->prev_eqd) {
2159                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2160                         set_eqd[num].eq_id = eqo->q.id;
2161                         aic->prev_eqd = eqd;
2162                         num++;
2163                 }
2164         }
2165
2166         if (num)
2167                 be_cmd_modify_eqd(adapter, set_eqd, num);
2168 }
2169
2170 static void be_rx_stats_update(struct be_rx_obj *rxo,
2171                                struct be_rx_compl_info *rxcp)
2172 {
2173         struct be_rx_stats *stats = rx_stats(rxo);
2174
2175         u64_stats_update_begin(&stats->sync);
2176         stats->rx_compl++;
2177         stats->rx_bytes += rxcp->pkt_size;
2178         stats->rx_pkts++;
2179         if (rxcp->tunneled)
2180                 stats->rx_vxlan_offload_pkts++;
2181         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2182                 stats->rx_mcast_pkts++;
2183         if (rxcp->err)
2184                 stats->rx_compl_err++;
2185         u64_stats_update_end(&stats->sync);
2186 }
2187
2188 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2189 {
2190         /* L4 checksum is not reliable for non TCP/UDP packets.
2191          * Also ignore ipcksm for ipv6 pkts
2192          */
2193         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2194                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2195 }
2196
2197 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2198 {
2199         struct be_adapter *adapter = rxo->adapter;
2200         struct be_rx_page_info *rx_page_info;
2201         struct be_queue_info *rxq = &rxo->q;
2202         u32 frag_idx = rxq->tail;
2203
2204         rx_page_info = &rxo->page_info_tbl[frag_idx];
2205         BUG_ON(!rx_page_info->page);
2206
2207         if (rx_page_info->last_frag) {
2208                 dma_unmap_page(&adapter->pdev->dev,
2209                                dma_unmap_addr(rx_page_info, bus),
2210                                adapter->big_page_size, DMA_FROM_DEVICE);
2211                 rx_page_info->last_frag = false;
2212         } else {
2213                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2214                                         dma_unmap_addr(rx_page_info, bus),
2215                                         rx_frag_size, DMA_FROM_DEVICE);
2216         }
2217
2218         queue_tail_inc(rxq);
2219         atomic_dec(&rxq->used);
2220         return rx_page_info;
2221 }
2222
2223 /* Throwaway the data in the Rx completion */
2224 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2225                                 struct be_rx_compl_info *rxcp)
2226 {
2227         struct be_rx_page_info *page_info;
2228         u16 i, num_rcvd = rxcp->num_rcvd;
2229
2230         for (i = 0; i < num_rcvd; i++) {
2231                 page_info = get_rx_page_info(rxo);
2232                 put_page(page_info->page);
2233                 memset(page_info, 0, sizeof(*page_info));
2234         }
2235 }
2236
2237 /*
2238  * skb_fill_rx_data forms a complete skb for an ether frame
2239  * indicated by rxcp.
2240  */
2241 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2242                              struct be_rx_compl_info *rxcp)
2243 {
2244         struct be_rx_page_info *page_info;
2245         u16 i, j;
2246         u16 hdr_len, curr_frag_len, remaining;
2247         u8 *start;
2248
2249         page_info = get_rx_page_info(rxo);
2250         start = page_address(page_info->page) + page_info->page_offset;
2251         prefetch(start);
2252
2253         /* Copy data in the first descriptor of this completion */
2254         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2255
2256         skb->len = curr_frag_len;
2257         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2258                 memcpy(skb->data, start, curr_frag_len);
2259                 /* Complete packet has now been moved to data */
2260                 put_page(page_info->page);
2261                 skb->data_len = 0;
2262                 skb->tail += curr_frag_len;
2263         } else {
2264                 hdr_len = ETH_HLEN;
2265                 memcpy(skb->data, start, hdr_len);
2266                 skb_shinfo(skb)->nr_frags = 1;
2267                 skb_frag_set_page(skb, 0, page_info->page);
2268                 skb_shinfo(skb)->frags[0].page_offset =
2269                                         page_info->page_offset + hdr_len;
2270                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2271                                   curr_frag_len - hdr_len);
2272                 skb->data_len = curr_frag_len - hdr_len;
2273                 skb->truesize += rx_frag_size;
2274                 skb->tail += hdr_len;
2275         }
2276         page_info->page = NULL;
2277
2278         if (rxcp->pkt_size <= rx_frag_size) {
2279                 BUG_ON(rxcp->num_rcvd != 1);
2280                 return;
2281         }
2282
2283         /* More frags present for this completion */
2284         remaining = rxcp->pkt_size - curr_frag_len;
2285         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2286                 page_info = get_rx_page_info(rxo);
2287                 curr_frag_len = min(remaining, rx_frag_size);
2288
2289                 /* Coalesce all frags from the same physical page in one slot */
2290                 if (page_info->page_offset == 0) {
2291                         /* Fresh page */
2292                         j++;
2293                         skb_frag_set_page(skb, j, page_info->page);
2294                         skb_shinfo(skb)->frags[j].page_offset =
2295                                                         page_info->page_offset;
2296                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2297                         skb_shinfo(skb)->nr_frags++;
2298                 } else {
2299                         put_page(page_info->page);
2300                 }
2301
2302                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2303                 skb->len += curr_frag_len;
2304                 skb->data_len += curr_frag_len;
2305                 skb->truesize += rx_frag_size;
2306                 remaining -= curr_frag_len;
2307                 page_info->page = NULL;
2308         }
2309         BUG_ON(j > MAX_SKB_FRAGS);
2310 }
2311
2312 /* Process the RX completion indicated by rxcp when GRO is disabled */
2313 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2314                                 struct be_rx_compl_info *rxcp)
2315 {
2316         struct be_adapter *adapter = rxo->adapter;
2317         struct net_device *netdev = adapter->netdev;
2318         struct sk_buff *skb;
2319
2320         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2321         if (unlikely(!skb)) {
2322                 rx_stats(rxo)->rx_drops_no_skbs++;
2323                 be_rx_compl_discard(rxo, rxcp);
2324                 return;
2325         }
2326
2327         skb_fill_rx_data(rxo, skb, rxcp);
2328
2329         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2330                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2331         else
2332                 skb_checksum_none_assert(skb);
2333
2334         skb->protocol = eth_type_trans(skb, netdev);
2335         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2336         if (netdev->features & NETIF_F_RXHASH)
2337                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2338
2339         skb->csum_level = rxcp->tunneled;
2340         skb_mark_napi_id(skb, napi);
2341
2342         if (rxcp->vlanf)
2343                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2344
2345         netif_receive_skb(skb);
2346 }
2347
2348 /* Process the RX completion indicated by rxcp when GRO is enabled */
2349 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2350                                     struct napi_struct *napi,
2351                                     struct be_rx_compl_info *rxcp)
2352 {
2353         struct be_adapter *adapter = rxo->adapter;
2354         struct be_rx_page_info *page_info;
2355         struct sk_buff *skb = NULL;
2356         u16 remaining, curr_frag_len;
2357         u16 i, j;
2358
2359         skb = napi_get_frags(napi);
2360         if (!skb) {
2361                 be_rx_compl_discard(rxo, rxcp);
2362                 return;
2363         }
2364
2365         remaining = rxcp->pkt_size;
2366         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2367                 page_info = get_rx_page_info(rxo);
2368
2369                 curr_frag_len = min(remaining, rx_frag_size);
2370
2371                 /* Coalesce all frags from the same physical page in one slot */
2372                 if (i == 0 || page_info->page_offset == 0) {
2373                         /* First frag or Fresh page */
2374                         j++;
2375                         skb_frag_set_page(skb, j, page_info->page);
2376                         skb_shinfo(skb)->frags[j].page_offset =
2377                                                         page_info->page_offset;
2378                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2379                 } else {
2380                         put_page(page_info->page);
2381                 }
2382                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                 skb->truesize += rx_frag_size;
2384                 remaining -= curr_frag_len;
2385                 memset(page_info, 0, sizeof(*page_info));
2386         }
2387         BUG_ON(j > MAX_SKB_FRAGS);
2388
2389         skb_shinfo(skb)->nr_frags = j + 1;
2390         skb->len = rxcp->pkt_size;
2391         skb->data_len = rxcp->pkt_size;
2392         skb->ip_summed = CHECKSUM_UNNECESSARY;
2393         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2394         if (adapter->netdev->features & NETIF_F_RXHASH)
2395                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2396
2397         skb->csum_level = rxcp->tunneled;
2398
2399         if (rxcp->vlanf)
2400                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2401
2402         napi_gro_frags(napi);
2403 }
2404
2405 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2406                                  struct be_rx_compl_info *rxcp)
2407 {
2408         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2409         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2410         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2411         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2412         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2413         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2414         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2415         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2416         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2417         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2418         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2419         if (rxcp->vlanf) {
2420                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2421                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2422         }
2423         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2424         rxcp->tunneled =
2425                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2426 }
2427
2428 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2429                                  struct be_rx_compl_info *rxcp)
2430 {
2431         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2432         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2433         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2434         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2435         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2436         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2437         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2438         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2439         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2440         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2441         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2442         if (rxcp->vlanf) {
2443                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2444                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2445         }
2446         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2447         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2448 }
2449
2450 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2451 {
2452         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2453         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2454         struct be_adapter *adapter = rxo->adapter;
2455
2456         /* For checking the valid bit it is Ok to use either definition as the
2457          * valid bit is at the same position in both v0 and v1 Rx compl */
2458         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2459                 return NULL;
2460
2461         rmb();
2462         be_dws_le_to_cpu(compl, sizeof(*compl));
2463
2464         if (adapter->be3_native)
2465                 be_parse_rx_compl_v1(compl, rxcp);
2466         else
2467                 be_parse_rx_compl_v0(compl, rxcp);
2468
2469         if (rxcp->ip_frag)
2470                 rxcp->l4_csum = 0;
2471
2472         if (rxcp->vlanf) {
2473                 /* In QNQ modes, if qnq bit is not set, then the packet was
2474                  * tagged only with the transparent outer vlan-tag and must
2475                  * not be treated as a vlan packet by host
2476                  */
2477                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2478                         rxcp->vlanf = 0;
2479
2480                 if (!lancer_chip(adapter))
2481                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2482
2483                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2484                     !test_bit(rxcp->vlan_tag, adapter->vids))
2485                         rxcp->vlanf = 0;
2486         }
2487
2488         /* As the compl has been parsed, reset it; we wont touch it again */
2489         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2490
2491         queue_tail_inc(&rxo->cq);
2492         return rxcp;
2493 }
2494
2495 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2496 {
2497         u32 order = get_order(size);
2498
2499         if (order > 0)
2500                 gfp |= __GFP_COMP;
2501         return  alloc_pages(gfp, order);
2502 }
2503
2504 /*
2505  * Allocate a page, split it to fragments of size rx_frag_size and post as
2506  * receive buffers to BE
2507  */
2508 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2509 {
2510         struct be_adapter *adapter = rxo->adapter;
2511         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2512         struct be_queue_info *rxq = &rxo->q;
2513         struct page *pagep = NULL;
2514         struct device *dev = &adapter->pdev->dev;
2515         struct be_eth_rx_d *rxd;
2516         u64 page_dmaaddr = 0, frag_dmaaddr;
2517         u32 posted, page_offset = 0, notify = 0;
2518
2519         page_info = &rxo->page_info_tbl[rxq->head];
2520         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2521                 if (!pagep) {
2522                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2523                         if (unlikely(!pagep)) {
2524                                 rx_stats(rxo)->rx_post_fail++;
2525                                 break;
2526                         }
2527                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2528                                                     adapter->big_page_size,
2529                                                     DMA_FROM_DEVICE);
2530                         if (dma_mapping_error(dev, page_dmaaddr)) {
2531                                 put_page(pagep);
2532                                 pagep = NULL;
2533                                 adapter->drv_stats.dma_map_errors++;
2534                                 break;
2535                         }
2536                         page_offset = 0;
2537                 } else {
2538                         get_page(pagep);
2539                         page_offset += rx_frag_size;
2540                 }
2541                 page_info->page_offset = page_offset;
2542                 page_info->page = pagep;
2543
2544                 rxd = queue_head_node(rxq);
2545                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2546                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2547                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2548
2549                 /* Any space left in the current big page for another frag? */
2550                 if ((page_offset + rx_frag_size + rx_frag_size) >
2551                                         adapter->big_page_size) {
2552                         pagep = NULL;
2553                         page_info->last_frag = true;
2554                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2555                 } else {
2556                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2557                 }
2558
2559                 prev_page_info = page_info;
2560                 queue_head_inc(rxq);
2561                 page_info = &rxo->page_info_tbl[rxq->head];
2562         }
2563
2564         /* Mark the last frag of a page when we break out of the above loop
2565          * with no more slots available in the RXQ
2566          */
2567         if (pagep) {
2568                 prev_page_info->last_frag = true;
2569                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2570         }
2571
2572         if (posted) {
2573                 atomic_add(posted, &rxq->used);
2574                 if (rxo->rx_post_starved)
2575                         rxo->rx_post_starved = false;
2576                 do {
2577                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2578                         be_rxq_notify(adapter, rxq->id, notify);
2579                         posted -= notify;
2580                 } while (posted);
2581         } else if (atomic_read(&rxq->used) == 0) {
2582                 /* Let be_worker replenish when memory is available */
2583                 rxo->rx_post_starved = true;
2584         }
2585 }
2586
2587 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2588 {
2589         struct be_queue_info *tx_cq = &txo->cq;
2590         struct be_tx_compl_info *txcp = &txo->txcp;
2591         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2592
2593         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2594                 return NULL;
2595
2596         /* Ensure load ordering of valid bit dword and other dwords below */
2597         rmb();
2598         be_dws_le_to_cpu(compl, sizeof(*compl));
2599
2600         txcp->status = GET_TX_COMPL_BITS(status, compl);
2601         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2602
2603         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2604         queue_tail_inc(tx_cq);
2605         return txcp;
2606 }
2607
2608 static u16 be_tx_compl_process(struct be_adapter *adapter,
2609                                struct be_tx_obj *txo, u16 last_index)
2610 {
2611         struct sk_buff **sent_skbs = txo->sent_skb_list;
2612         struct be_queue_info *txq = &txo->q;
2613         struct sk_buff *skb = NULL;
2614         bool unmap_skb_hdr = false;
2615         struct be_eth_wrb *wrb;
2616         u16 num_wrbs = 0;
2617         u32 frag_index;
2618
2619         do {
2620                 if (sent_skbs[txq->tail]) {
2621                         /* Free skb from prev req */
2622                         if (skb)
2623                                 dev_consume_skb_any(skb);
2624                         skb = sent_skbs[txq->tail];
2625                         sent_skbs[txq->tail] = NULL;
2626                         queue_tail_inc(txq);  /* skip hdr wrb */
2627                         num_wrbs++;
2628                         unmap_skb_hdr = true;
2629                 }
2630                 wrb = queue_tail_node(txq);
2631                 frag_index = txq->tail;
2632                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2633                               (unmap_skb_hdr && skb_headlen(skb)));
2634                 unmap_skb_hdr = false;
2635                 queue_tail_inc(txq);
2636                 num_wrbs++;
2637         } while (frag_index != last_index);
2638         dev_consume_skb_any(skb);
2639
2640         return num_wrbs;
2641 }
2642
2643 /* Return the number of events in the event queue */
2644 static inline int events_get(struct be_eq_obj *eqo)
2645 {
2646         struct be_eq_entry *eqe;
2647         int num = 0;
2648
2649         do {
2650                 eqe = queue_tail_node(&eqo->q);
2651                 if (eqe->evt == 0)
2652                         break;
2653
2654                 rmb();
2655                 eqe->evt = 0;
2656                 num++;
2657                 queue_tail_inc(&eqo->q);
2658         } while (true);
2659
2660         return num;
2661 }
2662
2663 /* Leaves the EQ is disarmed state */
2664 static void be_eq_clean(struct be_eq_obj *eqo)
2665 {
2666         int num = events_get(eqo);
2667
2668         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2669 }
2670
2671 /* Free posted rx buffers that were not used */
2672 static void be_rxq_clean(struct be_rx_obj *rxo)
2673 {
2674         struct be_queue_info *rxq = &rxo->q;
2675         struct be_rx_page_info *page_info;
2676
2677         while (atomic_read(&rxq->used) > 0) {
2678                 page_info = get_rx_page_info(rxo);
2679                 put_page(page_info->page);
2680                 memset(page_info, 0, sizeof(*page_info));
2681         }
2682         BUG_ON(atomic_read(&rxq->used));
2683         rxq->tail = 0;
2684         rxq->head = 0;
2685 }
2686
2687 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2688 {
2689         struct be_queue_info *rx_cq = &rxo->cq;
2690         struct be_rx_compl_info *rxcp;
2691         struct be_adapter *adapter = rxo->adapter;
2692         int flush_wait = 0;
2693
2694         /* Consume pending rx completions.
2695          * Wait for the flush completion (identified by zero num_rcvd)
2696          * to arrive. Notify CQ even when there are no more CQ entries
2697          * for HW to flush partially coalesced CQ entries.
2698          * In Lancer, there is no need to wait for flush compl.
2699          */
2700         for (;;) {
2701                 rxcp = be_rx_compl_get(rxo);
2702                 if (!rxcp) {
2703                         if (lancer_chip(adapter))
2704                                 break;
2705
2706                         if (flush_wait++ > 50 ||
2707                             be_check_error(adapter,
2708                                            BE_ERROR_HW)) {
2709                                 dev_warn(&adapter->pdev->dev,
2710                                          "did not receive flush compl\n");
2711                                 break;
2712                         }
2713                         be_cq_notify(adapter, rx_cq->id, true, 0);
2714                         mdelay(1);
2715                 } else {
2716                         be_rx_compl_discard(rxo, rxcp);
2717                         be_cq_notify(adapter, rx_cq->id, false, 1);
2718                         if (rxcp->num_rcvd == 0)
2719                                 break;
2720                 }
2721         }
2722
2723         /* After cleanup, leave the CQ in unarmed state */
2724         be_cq_notify(adapter, rx_cq->id, false, 0);
2725 }
2726
2727 static void be_tx_compl_clean(struct be_adapter *adapter)
2728 {
2729         struct device *dev = &adapter->pdev->dev;
2730         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2731         struct be_tx_compl_info *txcp;
2732         struct be_queue_info *txq;
2733         u32 end_idx, notified_idx;
2734         struct be_tx_obj *txo;
2735         int i, pending_txqs;
2736
2737         /* Stop polling for compls when HW has been silent for 10ms */
2738         do {
2739                 pending_txqs = adapter->num_tx_qs;
2740
2741                 for_all_tx_queues(adapter, txo, i) {
2742                         cmpl = 0;
2743                         num_wrbs = 0;
2744                         txq = &txo->q;
2745                         while ((txcp = be_tx_compl_get(txo))) {
2746                                 num_wrbs +=
2747                                         be_tx_compl_process(adapter, txo,
2748                                                             txcp->end_index);
2749                                 cmpl++;
2750                         }
2751                         if (cmpl) {
2752                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2753                                 atomic_sub(num_wrbs, &txq->used);
2754                                 timeo = 0;
2755                         }
2756                         if (!be_is_tx_compl_pending(txo))
2757                                 pending_txqs--;
2758                 }
2759
2760                 if (pending_txqs == 0 || ++timeo > 10 ||
2761                     be_check_error(adapter, BE_ERROR_HW))
2762                         break;
2763
2764                 mdelay(1);
2765         } while (true);
2766
2767         /* Free enqueued TX that was never notified to HW */
2768         for_all_tx_queues(adapter, txo, i) {
2769                 txq = &txo->q;
2770
2771                 if (atomic_read(&txq->used)) {
2772                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2773                                  i, atomic_read(&txq->used));
2774                         notified_idx = txq->tail;
2775                         end_idx = txq->tail;
2776                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2777                                   txq->len);
2778                         /* Use the tx-compl process logic to handle requests
2779                          * that were not sent to the HW.
2780                          */
2781                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2782                         atomic_sub(num_wrbs, &txq->used);
2783                         BUG_ON(atomic_read(&txq->used));
2784                         txo->pend_wrb_cnt = 0;
2785                         /* Since hw was never notified of these requests,
2786                          * reset TXQ indices
2787                          */
2788                         txq->head = notified_idx;
2789                         txq->tail = notified_idx;
2790                 }
2791         }
2792 }
2793
2794 static void be_evt_queues_destroy(struct be_adapter *adapter)
2795 {
2796         struct be_eq_obj *eqo;
2797         int i;
2798
2799         for_all_evt_queues(adapter, eqo, i) {
2800                 if (eqo->q.created) {
2801                         be_eq_clean(eqo);
2802                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2803                         netif_napi_del(&eqo->napi);
2804                         free_cpumask_var(eqo->affinity_mask);
2805                 }
2806                 be_queue_free(adapter, &eqo->q);
2807         }
2808 }
2809
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812         struct be_queue_info *eq;
2813         struct be_eq_obj *eqo;
2814         struct be_aic_obj *aic;
2815         int i, rc;
2816
2817         /* need enough EQs to service both RX and TX queues */
2818         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819                                     max(adapter->cfg_num_rx_irqs,
2820                                         adapter->cfg_num_tx_irqs));
2821
2822         for_all_evt_queues(adapter, eqo, i) {
2823                 int numa_node = dev_to_node(&adapter->pdev->dev);
2824
2825                 aic = &adapter->aic_obj[i];
2826                 eqo->adapter = adapter;
2827                 eqo->idx = i;
2828                 aic->max_eqd = BE_MAX_EQD;
2829                 aic->enable = true;
2830
2831                 eq = &eqo->q;
2832                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833                                     sizeof(struct be_eq_entry));
2834                 if (rc)
2835                         return rc;
2836
2837                 rc = be_cmd_eq_create(adapter, eqo);
2838                 if (rc)
2839                         return rc;
2840
2841                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842                         return -ENOMEM;
2843                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844                                 eqo->affinity_mask);
2845                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846                                BE_NAPI_WEIGHT);
2847         }
2848         return 0;
2849 }
2850
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853         struct be_queue_info *q;
2854
2855         q = &adapter->mcc_obj.q;
2856         if (q->created)
2857                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858         be_queue_free(adapter, q);
2859
2860         q = &adapter->mcc_obj.cq;
2861         if (q->created)
2862                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863         be_queue_free(adapter, q);
2864 }
2865
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869         struct be_queue_info *q, *cq;
2870
2871         cq = &adapter->mcc_obj.cq;
2872         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873                            sizeof(struct be_mcc_compl)))
2874                 goto err;
2875
2876         /* Use the default EQ for MCC completions */
2877         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878                 goto mcc_cq_free;
2879
2880         q = &adapter->mcc_obj.q;
2881         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882                 goto mcc_cq_destroy;
2883
2884         if (be_cmd_mccq_create(adapter, q, cq))
2885                 goto mcc_q_free;
2886
2887         return 0;
2888
2889 mcc_q_free:
2890         be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894         be_queue_free(adapter, cq);
2895 err:
2896         return -1;
2897 }
2898
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901         struct be_queue_info *q;
2902         struct be_tx_obj *txo;
2903         u8 i;
2904
2905         for_all_tx_queues(adapter, txo, i) {
2906                 q = &txo->q;
2907                 if (q->created)
2908                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909                 be_queue_free(adapter, q);
2910
2911                 q = &txo->cq;
2912                 if (q->created)
2913                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914                 be_queue_free(adapter, q);
2915         }
2916 }
2917
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920         struct be_queue_info *cq;
2921         struct be_tx_obj *txo;
2922         struct be_eq_obj *eqo;
2923         int status, i;
2924
2925         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926
2927         for_all_tx_queues(adapter, txo, i) {
2928                 cq = &txo->cq;
2929                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930                                         sizeof(struct be_eth_tx_compl));
2931                 if (status)
2932                         return status;
2933
2934                 u64_stats_init(&txo->stats.sync);
2935                 u64_stats_init(&txo->stats.sync_compl);
2936
2937                 /* If num_evt_qs is less than num_tx_qs, then more than
2938                  * one txq share an eq
2939                  */
2940                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942                 if (status)
2943                         return status;
2944
2945                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946                                         sizeof(struct be_eth_wrb));
2947                 if (status)
2948                         return status;
2949
2950                 status = be_cmd_txq_create(adapter, txo);
2951                 if (status)
2952                         return status;
2953
2954                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955                                     eqo->idx);
2956         }
2957
2958         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959                  adapter->num_tx_qs);
2960         return 0;
2961 }
2962
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965         struct be_queue_info *q;
2966         struct be_rx_obj *rxo;
2967         int i;
2968
2969         for_all_rx_queues(adapter, rxo, i) {
2970                 q = &rxo->cq;
2971                 if (q->created)
2972                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973                 be_queue_free(adapter, q);
2974         }
2975 }
2976
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979         struct be_queue_info *eq, *cq;
2980         struct be_rx_obj *rxo;
2981         int rc, i;
2982
2983         adapter->num_rss_qs =
2984                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985
2986         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2987         if (adapter->num_rss_qs < 2)
2988                 adapter->num_rss_qs = 0;
2989
2990         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991
2992         /* When the interface is not capable of RSS rings (and there is no
2993          * need to create a default RXQ) we'll still need one RXQ
2994          */
2995         if (adapter->num_rx_qs == 0)
2996                 adapter->num_rx_qs = 1;
2997
2998         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999         for_all_rx_queues(adapter, rxo, i) {
3000                 rxo->adapter = adapter;
3001                 cq = &rxo->cq;
3002                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003                                     sizeof(struct be_eth_rx_compl));
3004                 if (rc)
3005                         return rc;
3006
3007                 u64_stats_init(&rxo->stats.sync);
3008                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010                 if (rc)
3011                         return rc;
3012         }
3013
3014         dev_info(&adapter->pdev->dev,
3015                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3016         return 0;
3017 }
3018
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021         struct be_eq_obj *eqo = dev;
3022         struct be_adapter *adapter = eqo->adapter;
3023         int num_evts = 0;
3024
3025         /* IRQ is not expected when NAPI is scheduled as the EQ
3026          * will not be armed.
3027          * But, this can happen on Lancer INTx where it takes
3028          * a while to de-assert INTx or in BE2 where occasionaly
3029          * an interrupt may be raised even when EQ is unarmed.
3030          * If NAPI is already scheduled, then counting & notifying
3031          * events will orphan them.
3032          */
3033         if (napi_schedule_prep(&eqo->napi)) {
3034                 num_evts = events_get(eqo);
3035                 __napi_schedule(&eqo->napi);
3036                 if (num_evts)
3037                         eqo->spurious_intr = 0;
3038         }
3039         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040
3041         /* Return IRQ_HANDLED only for the the first spurious intr
3042          * after a valid intr to stop the kernel from branding
3043          * this irq as a bad one!
3044          */
3045         if (num_evts || eqo->spurious_intr++ == 0)
3046                 return IRQ_HANDLED;
3047         else
3048                 return IRQ_NONE;
3049 }
3050
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053         struct be_eq_obj *eqo = dev;
3054
3055         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056         napi_schedule(&eqo->napi);
3057         return IRQ_HANDLED;
3058 }
3059
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066                          int budget)
3067 {
3068         struct be_adapter *adapter = rxo->adapter;
3069         struct be_queue_info *rx_cq = &rxo->cq;
3070         struct be_rx_compl_info *rxcp;
3071         u32 work_done;
3072         u32 frags_consumed = 0;
3073
3074         for (work_done = 0; work_done < budget; work_done++) {
3075                 rxcp = be_rx_compl_get(rxo);
3076                 if (!rxcp)
3077                         break;
3078
3079                 /* Is it a flush compl that has no data */
3080                 if (unlikely(rxcp->num_rcvd == 0))
3081                         goto loop_continue;
3082
3083                 /* Discard compl with partial DMA Lancer B0 */
3084                 if (unlikely(!rxcp->pkt_size)) {
3085                         be_rx_compl_discard(rxo, rxcp);
3086                         goto loop_continue;
3087                 }
3088
3089                 /* On BE drop pkts that arrive due to imperfect filtering in
3090                  * promiscuous mode on some skews
3091                  */
3092                 if (unlikely(rxcp->port != adapter->port_num &&
3093                              !lancer_chip(adapter))) {
3094                         be_rx_compl_discard(rxo, rxcp);
3095                         goto loop_continue;
3096                 }
3097
3098                 if (do_gro(rxcp))
3099                         be_rx_compl_process_gro(rxo, napi, rxcp);
3100                 else
3101                         be_rx_compl_process(rxo, napi, rxcp);
3102
3103 loop_continue:
3104                 frags_consumed += rxcp->num_rcvd;
3105                 be_rx_stats_update(rxo, rxcp);
3106         }
3107
3108         if (work_done) {
3109                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3110
3111                 /* When an rx-obj gets into post_starved state, just
3112                  * let be_worker do the posting.
3113                  */
3114                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115                     !rxo->rx_post_starved)
3116                         be_post_rx_frags(rxo, GFP_ATOMIC,
3117                                          max_t(u32, MAX_RX_POST,
3118                                                frags_consumed));
3119         }
3120
3121         return work_done;
3122 }
3123
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126         switch (status) {
3127         case BE_TX_COMP_HDR_PARSE_ERR:
3128                 tx_stats(txo)->tx_hdr_parse_err++;
3129                 break;
3130         case BE_TX_COMP_NDMA_ERR:
3131                 tx_stats(txo)->tx_dma_err++;
3132                 break;
3133         case BE_TX_COMP_ACL_ERR:
3134                 tx_stats(txo)->tx_spoof_check_err++;
3135                 break;
3136         }
3137 }
3138
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141         switch (status) {
3142         case LANCER_TX_COMP_LSO_ERR:
3143                 tx_stats(txo)->tx_tso_err++;
3144                 break;
3145         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147                 tx_stats(txo)->tx_spoof_check_err++;
3148                 break;
3149         case LANCER_TX_COMP_QINQ_ERR:
3150                 tx_stats(txo)->tx_qinq_err++;
3151                 break;
3152         case LANCER_TX_COMP_PARITY_ERR:
3153                 tx_stats(txo)->tx_internal_parity_err++;
3154                 break;
3155         case LANCER_TX_COMP_DMA_ERR:
3156                 tx_stats(txo)->tx_dma_err++;
3157                 break;
3158         }
3159 }
3160
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162                           int idx)
3163 {
3164         int num_wrbs = 0, work_done = 0;
3165         struct be_tx_compl_info *txcp;
3166
3167         while ((txcp = be_tx_compl_get(txo))) {
3168                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169                 work_done++;
3170
3171                 if (txcp->status) {
3172                         if (lancer_chip(adapter))
3173                                 lancer_update_tx_err(txo, txcp->status);
3174                         else
3175                                 be_update_tx_err(txo, txcp->status);
3176                 }
3177         }
3178
3179         if (work_done) {
3180                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3181                 atomic_sub(num_wrbs, &txo->q.used);
3182
3183                 /* As Tx wrbs have been freed up, wake up netdev queue
3184                  * if it was stopped due to lack of tx wrbs.  */
3185                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186                     be_can_txq_wake(txo)) {
3187                         netif_wake_subqueue(adapter->netdev, idx);
3188                 }
3189
3190                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191                 tx_stats(txo)->tx_compl += work_done;
3192                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193         }
3194 }
3195
3196 int be_poll(struct napi_struct *napi, int budget)
3197 {
3198         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3199         struct be_adapter *adapter = eqo->adapter;
3200         int max_work = 0, work, i, num_evts;
3201         struct be_rx_obj *rxo;
3202         struct be_tx_obj *txo;
3203         u32 mult_enc = 0;
3204
3205         num_evts = events_get(eqo);
3206
3207         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3208                 be_process_tx(adapter, txo, i);
3209
3210         /* This loop will iterate twice for EQ0 in which
3211          * completions of the last RXQ (default one) are also processed
3212          * For other EQs the loop iterates only once
3213          */
3214         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3215                 work = be_process_rx(rxo, napi, budget);
3216                 max_work = max(work, max_work);
3217         }
3218
3219         if (is_mcc_eqo(eqo))
3220                 be_process_mcc(adapter);
3221
3222         if (max_work < budget) {
3223                 napi_complete_done(napi, max_work);
3224
3225                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3226                  * delay via a delay multiplier encoding value
3227                  */
3228                 if (skyhawk_chip(adapter))
3229                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3230
3231                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3232                              mult_enc);
3233         } else {
3234                 /* As we'll continue in polling mode, count and clear events */
3235                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3236         }
3237         return max_work;
3238 }
3239
3240 void be_detect_error(struct be_adapter *adapter)
3241 {
3242         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3243         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3244         u32 i;
3245         struct device *dev = &adapter->pdev->dev;
3246
3247         if (be_check_error(adapter, BE_ERROR_HW))
3248                 return;
3249
3250         if (lancer_chip(adapter)) {
3251                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3252                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3253                         be_set_error(adapter, BE_ERROR_UE);
3254                         sliport_err1 = ioread32(adapter->db +
3255                                                 SLIPORT_ERROR1_OFFSET);
3256                         sliport_err2 = ioread32(adapter->db +
3257                                                 SLIPORT_ERROR2_OFFSET);
3258                         /* Do not log error messages if its a FW reset */
3259                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3260                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3261                                 dev_info(dev, "Firmware update in progress\n");
3262                         } else {
3263                                 dev_err(dev, "Error detected in the card\n");
3264                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3265                                         sliport_status);
3266                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3267                                         sliport_err1);
3268                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3269                                         sliport_err2);
3270                         }
3271                 }
3272         } else {
3273                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3274                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3275                 ue_lo_mask = ioread32(adapter->pcicfg +
3276                                       PCICFG_UE_STATUS_LOW_MASK);
3277                 ue_hi_mask = ioread32(adapter->pcicfg +
3278                                       PCICFG_UE_STATUS_HI_MASK);
3279
3280                 ue_lo = (ue_lo & ~ue_lo_mask);
3281                 ue_hi = (ue_hi & ~ue_hi_mask);
3282
3283                 /* On certain platforms BE hardware can indicate spurious UEs.
3284                  * Allow HW to stop working completely in case of a real UE.
3285                  * Hence not setting the hw_error for UE detection.
3286                  */
3287
3288                 if (ue_lo || ue_hi) {
3289                         dev_err(dev, "Error detected in the adapter");
3290                         if (skyhawk_chip(adapter))
3291                                 be_set_error(adapter, BE_ERROR_UE);
3292
3293                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3294                                 if (ue_lo & 1)
3295                                         dev_err(dev, "UE: %s bit set\n",
3296                                                 ue_status_low_desc[i]);
3297                         }
3298                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3299                                 if (ue_hi & 1)
3300                                         dev_err(dev, "UE: %s bit set\n",
3301                                                 ue_status_hi_desc[i]);
3302                         }
3303                 }
3304         }
3305 }
3306
3307 static void be_msix_disable(struct be_adapter *adapter)
3308 {
3309         if (msix_enabled(adapter)) {
3310                 pci_disable_msix(adapter->pdev);
3311                 adapter->num_msix_vec = 0;
3312                 adapter->num_msix_roce_vec = 0;
3313         }
3314 }
3315
3316 static int be_msix_enable(struct be_adapter *adapter)
3317 {
3318         unsigned int i, max_roce_eqs;
3319         struct device *dev = &adapter->pdev->dev;
3320         int num_vec;
3321
3322         /* If RoCE is supported, program the max number of vectors that
3323          * could be used for NIC and RoCE, else, just program the number
3324          * we'll use initially.
3325          */
3326         if (be_roce_supported(adapter)) {
3327                 max_roce_eqs =
3328                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3329                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3330                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3331         } else {
3332                 num_vec = max(adapter->cfg_num_rx_irqs,
3333                               adapter->cfg_num_tx_irqs);
3334         }
3335
3336         for (i = 0; i < num_vec; i++)
3337                 adapter->msix_entries[i].entry = i;
3338
3339         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3340                                         MIN_MSIX_VECTORS, num_vec);
3341         if (num_vec < 0)
3342                 goto fail;
3343
3344         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3345                 adapter->num_msix_roce_vec = num_vec / 2;
3346                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3347                          adapter->num_msix_roce_vec);
3348         }
3349
3350         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3351
3352         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3353                  adapter->num_msix_vec);
3354         return 0;
3355
3356 fail:
3357         dev_warn(dev, "MSIx enable failed\n");
3358
3359         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3360         if (be_virtfn(adapter))
3361                 return num_vec;
3362         return 0;
3363 }
3364
3365 static inline int be_msix_vec_get(struct be_adapter *adapter,
3366                                   struct be_eq_obj *eqo)
3367 {
3368         return adapter->msix_entries[eqo->msix_idx].vector;
3369 }
3370
3371 static int be_msix_register(struct be_adapter *adapter)
3372 {
3373         struct net_device *netdev = adapter->netdev;
3374         struct be_eq_obj *eqo;
3375         int status, i, vec;
3376
3377         for_all_evt_queues(adapter, eqo, i) {
3378                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3379                 vec = be_msix_vec_get(adapter, eqo);
3380                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3381                 if (status)
3382                         goto err_msix;
3383
3384                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3385         }
3386
3387         return 0;
3388 err_msix:
3389         for (i--; i >= 0; i--) {
3390                 eqo = &adapter->eq_obj[i];
3391                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3392         }
3393         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3394                  status);
3395         be_msix_disable(adapter);
3396         return status;
3397 }
3398
3399 static int be_irq_register(struct be_adapter *adapter)
3400 {
3401         struct net_device *netdev = adapter->netdev;
3402         int status;
3403
3404         if (msix_enabled(adapter)) {
3405                 status = be_msix_register(adapter);
3406                 if (status == 0)
3407                         goto done;
3408                 /* INTx is not supported for VF */
3409                 if (be_virtfn(adapter))
3410                         return status;
3411         }
3412
3413         /* INTx: only the first EQ is used */
3414         netdev->irq = adapter->pdev->irq;
3415         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3416                              &adapter->eq_obj[0]);
3417         if (status) {
3418                 dev_err(&adapter->pdev->dev,
3419                         "INTx request IRQ failed - err %d\n", status);
3420                 return status;
3421         }
3422 done:
3423         adapter->isr_registered = true;
3424         return 0;
3425 }
3426
3427 static void be_irq_unregister(struct be_adapter *adapter)
3428 {
3429         struct net_device *netdev = adapter->netdev;
3430         struct be_eq_obj *eqo;
3431         int i, vec;
3432
3433         if (!adapter->isr_registered)
3434                 return;
3435
3436         /* INTx */
3437         if (!msix_enabled(adapter)) {
3438                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3439                 goto done;
3440         }
3441
3442         /* MSIx */
3443         for_all_evt_queues(adapter, eqo, i) {
3444                 vec = be_msix_vec_get(adapter, eqo);
3445                 irq_set_affinity_hint(vec, NULL);
3446                 free_irq(vec, eqo);
3447         }
3448
3449 done:
3450         adapter->isr_registered = false;
3451 }
3452
3453 static void be_rx_qs_destroy(struct be_adapter *adapter)
3454 {
3455         struct rss_info *rss = &adapter->rss_info;
3456         struct be_queue_info *q;
3457         struct be_rx_obj *rxo;
3458         int i;
3459
3460         for_all_rx_queues(adapter, rxo, i) {
3461                 q = &rxo->q;
3462                 if (q->created) {
3463                         /* If RXQs are destroyed while in an "out of buffer"
3464                          * state, there is a possibility of an HW stall on
3465                          * Lancer. So, post 64 buffers to each queue to relieve
3466                          * the "out of buffer" condition.
3467                          * Make sure there's space in the RXQ before posting.
3468                          */
3469                         if (lancer_chip(adapter)) {
3470                                 be_rx_cq_clean(rxo);
3471                                 if (atomic_read(&q->used) == 0)
3472                                         be_post_rx_frags(rxo, GFP_KERNEL,
3473                                                          MAX_RX_POST);
3474                         }
3475
3476                         be_cmd_rxq_destroy(adapter, q);
3477                         be_rx_cq_clean(rxo);
3478                         be_rxq_clean(rxo);
3479                 }
3480                 be_queue_free(adapter, q);
3481         }
3482
3483         if (rss->rss_flags) {
3484                 rss->rss_flags = RSS_ENABLE_NONE;
3485                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3486                                   128, rss->rss_hkey);
3487         }
3488 }
3489
3490 static void be_disable_if_filters(struct be_adapter *adapter)
3491 {
3492         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3493         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3494             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3495                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3496                 eth_zero_addr(adapter->dev_mac);
3497         }
3498
3499         be_clear_uc_list(adapter);
3500         be_clear_mc_list(adapter);
3501
3502         /* The IFACE flags are enabled in the open path and cleared
3503          * in the close path. When a VF gets detached from the host and
3504          * assigned to a VM the following happens:
3505          *      - VF's IFACE flags get cleared in the detach path
3506          *      - IFACE create is issued by the VF in the attach path
3507          * Due to a bug in the BE3/Skyhawk-R FW
3508          * (Lancer FW doesn't have the bug), the IFACE capability flags
3509          * specified along with the IFACE create cmd issued by a VF are not
3510          * honoured by FW.  As a consequence, if a *new* driver
3511          * (that enables/disables IFACE flags in open/close)
3512          * is loaded in the host and an *old* driver is * used by a VM/VF,
3513          * the IFACE gets created *without* the needed flags.
3514          * To avoid this, disable RX-filter flags only for Lancer.
3515          */
3516         if (lancer_chip(adapter)) {
3517                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3518                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3519         }
3520 }
3521
3522 static int be_close(struct net_device *netdev)
3523 {
3524         struct be_adapter *adapter = netdev_priv(netdev);
3525         struct be_eq_obj *eqo;
3526         int i;
3527
3528         /* This protection is needed as be_close() may be called even when the
3529          * adapter is in cleared state (after eeh perm failure)
3530          */
3531         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3532                 return 0;
3533
3534         /* Before attempting cleanup ensure all the pending cmds in the
3535          * config_wq have finished execution
3536          */
3537         flush_workqueue(be_wq);
3538
3539         be_disable_if_filters(adapter);
3540
3541         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3542                 for_all_evt_queues(adapter, eqo, i) {
3543                         napi_disable(&eqo->napi);
3544                 }
3545                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3546         }
3547
3548         be_async_mcc_disable(adapter);
3549
3550         /* Wait for all pending tx completions to arrive so that
3551          * all tx skbs are freed.
3552          */
3553         netif_tx_disable(netdev);
3554         be_tx_compl_clean(adapter);
3555
3556         be_rx_qs_destroy(adapter);
3557
3558         for_all_evt_queues(adapter, eqo, i) {
3559                 if (msix_enabled(adapter))
3560                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3561                 else
3562                         synchronize_irq(netdev->irq);
3563                 be_eq_clean(eqo);
3564         }
3565
3566         be_irq_unregister(adapter);
3567
3568         return 0;
3569 }
3570
3571 static int be_rx_qs_create(struct be_adapter *adapter)
3572 {
3573         struct rss_info *rss = &adapter->rss_info;
3574         u8 rss_key[RSS_HASH_KEY_LEN];
3575         struct be_rx_obj *rxo;
3576         int rc, i, j;
3577
3578         for_all_rx_queues(adapter, rxo, i) {
3579                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3580                                     sizeof(struct be_eth_rx_d));
3581                 if (rc)
3582                         return rc;
3583         }
3584
3585         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3586                 rxo = default_rxo(adapter);
3587                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3588                                        rx_frag_size, adapter->if_handle,
3589                                        false, &rxo->rss_id);
3590                 if (rc)
3591                         return rc;
3592         }
3593
3594         for_all_rss_queues(adapter, rxo, i) {
3595                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3596                                        rx_frag_size, adapter->if_handle,
3597                                        true, &rxo->rss_id);
3598                 if (rc)
3599                         return rc;
3600         }
3601
3602         if (be_multi_rxq(adapter)) {
3603                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3604                         for_all_rss_queues(adapter, rxo, i) {
3605                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3606                                         break;
3607                                 rss->rsstable[j + i] = rxo->rss_id;
3608                                 rss->rss_queue[j + i] = i;
3609                         }
3610                 }
3611                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3612                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3613
3614                 if (!BEx_chip(adapter))
3615                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3616                                 RSS_ENABLE_UDP_IPV6;
3617
3618                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3619                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3620                                        RSS_INDIR_TABLE_LEN, rss_key);
3621                 if (rc) {
3622                         rss->rss_flags = RSS_ENABLE_NONE;
3623                         return rc;
3624                 }
3625
3626                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3627         } else {
3628                 /* Disable RSS, if only default RX Q is created */
3629                 rss->rss_flags = RSS_ENABLE_NONE;
3630         }
3631
3632
3633         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3634          * which is a queue empty condition
3635          */
3636         for_all_rx_queues(adapter, rxo, i)
3637                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3638
3639         return 0;
3640 }
3641
3642 static int be_enable_if_filters(struct be_adapter *adapter)
3643 {
3644         int status;
3645
3646         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3647         if (status)
3648                 return status;
3649
3650         /* Normally this condition usually true as the ->dev_mac is zeroed.
3651          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3652          * subsequent be_dev_mac_add() can fail (after fresh boot)
3653          */
3654         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3655                 int old_pmac_id = -1;
3656
3657                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3658                 if (!is_zero_ether_addr(adapter->dev_mac))
3659                         old_pmac_id = adapter->pmac_id[0];
3660
3661                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3662                 if (status)
3663                         return status;
3664
3665                 /* Delete the old programmed MAC as we successfully programmed
3666                  * a new MAC
3667                  */
3668                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3669                         be_dev_mac_del(adapter, old_pmac_id);
3670
3671                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3672         }
3673
3674         if (adapter->vlans_added)
3675                 be_vid_config(adapter);
3676
3677         __be_set_rx_mode(adapter);
3678
3679         return 0;
3680 }
3681
3682 static int be_open(struct net_device *netdev)
3683 {
3684         struct be_adapter *adapter = netdev_priv(netdev);
3685         struct be_eq_obj *eqo;
3686         struct be_rx_obj *rxo;
3687         struct be_tx_obj *txo;
3688         u8 link_status;
3689         int status, i;
3690
3691         status = be_rx_qs_create(adapter);
3692         if (status)
3693                 goto err;
3694
3695         status = be_enable_if_filters(adapter);
3696         if (status)
3697                 goto err;
3698
3699         status = be_irq_register(adapter);
3700         if (status)
3701                 goto err;
3702
3703         for_all_rx_queues(adapter, rxo, i)
3704                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3705
3706         for_all_tx_queues(adapter, txo, i)
3707                 be_cq_notify(adapter, txo->cq.id, true, 0);
3708
3709         be_async_mcc_enable(adapter);
3710
3711         for_all_evt_queues(adapter, eqo, i) {
3712                 napi_enable(&eqo->napi);
3713                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3714         }
3715         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3716
3717         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3718         if (!status)
3719                 be_link_status_update(adapter, link_status);
3720
3721         netif_tx_start_all_queues(netdev);
3722         if (skyhawk_chip(adapter))
3723                 udp_tunnel_get_rx_info(netdev);
3724
3725         return 0;
3726 err:
3727         be_close(adapter->netdev);
3728         return -EIO;
3729 }
3730
3731 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3732 {
3733         u32 addr;
3734
3735         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3736
3737         mac[5] = (u8)(addr & 0xFF);
3738         mac[4] = (u8)((addr >> 8) & 0xFF);
3739         mac[3] = (u8)((addr >> 16) & 0xFF);
3740         /* Use the OUI from the current MAC address */
3741         memcpy(mac, adapter->netdev->dev_addr, 3);
3742 }
3743
3744 /*
3745  * Generate a seed MAC address from the PF MAC Address using jhash.
3746  * MAC Address for VFs are assigned incrementally starting from the seed.
3747  * These addresses are programmed in the ASIC by the PF and the VF driver
3748  * queries for the MAC address during its probe.
3749  */
3750 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3751 {
3752         u32 vf;
3753         int status = 0;
3754         u8 mac[ETH_ALEN];
3755         struct be_vf_cfg *vf_cfg;
3756
3757         be_vf_eth_addr_generate(adapter, mac);
3758
3759         for_all_vfs(adapter, vf_cfg, vf) {
3760                 if (BEx_chip(adapter))
3761                         status = be_cmd_pmac_add(adapter, mac,
3762                                                  vf_cfg->if_handle,
3763                                                  &vf_cfg->pmac_id, vf + 1);
3764                 else
3765                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3766                                                 vf + 1);
3767
3768                 if (status)
3769                         dev_err(&adapter->pdev->dev,
3770                                 "Mac address assignment failed for VF %d\n",
3771                                 vf);
3772                 else
3773                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3774
3775                 mac[5] += 1;
3776         }
3777         return status;
3778 }
3779
3780 static int be_vfs_mac_query(struct be_adapter *adapter)
3781 {
3782         int status, vf;
3783         u8 mac[ETH_ALEN];
3784         struct be_vf_cfg *vf_cfg;
3785
3786         for_all_vfs(adapter, vf_cfg, vf) {
3787                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3788                                                mac, vf_cfg->if_handle,
3789                                                false, vf+1);
3790                 if (status)
3791                         return status;
3792                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3793         }
3794         return 0;
3795 }
3796
3797 static void be_vf_clear(struct be_adapter *adapter)
3798 {
3799         struct be_vf_cfg *vf_cfg;
3800         u32 vf;
3801
3802         if (pci_vfs_assigned(adapter->pdev)) {
3803                 dev_warn(&adapter->pdev->dev,
3804                          "VFs are assigned to VMs: not disabling VFs\n");
3805                 goto done;
3806         }
3807
3808         pci_disable_sriov(adapter->pdev);
3809
3810         for_all_vfs(adapter, vf_cfg, vf) {
3811                 if (BEx_chip(adapter))
3812                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3813                                         vf_cfg->pmac_id, vf + 1);
3814                 else
3815                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3816                                        vf + 1);
3817
3818                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3819         }
3820
3821         if (BE3_chip(adapter))
3822                 be_cmd_set_hsw_config(adapter, 0, 0,
3823                                       adapter->if_handle,
3824                                       PORT_FWD_TYPE_PASSTHRU, 0);
3825 done:
3826         kfree(adapter->vf_cfg);
3827         adapter->num_vfs = 0;
3828         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3829 }
3830
3831 static void be_clear_queues(struct be_adapter *adapter)
3832 {
3833         be_mcc_queues_destroy(adapter);
3834         be_rx_cqs_destroy(adapter);
3835         be_tx_queues_destroy(adapter);
3836         be_evt_queues_destroy(adapter);
3837 }
3838
3839 static void be_cancel_worker(struct be_adapter *adapter)
3840 {
3841         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3842                 cancel_delayed_work_sync(&adapter->work);
3843                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3844         }
3845 }
3846
3847 static void be_cancel_err_detection(struct be_adapter *adapter)
3848 {
3849         struct be_error_recovery *err_rec = &adapter->error_recovery;
3850
3851         if (!be_err_recovery_workq)
3852                 return;
3853
3854         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3855                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3856                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3857         }
3858 }
3859
3860 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3861 {
3862         struct net_device *netdev = adapter->netdev;
3863
3864         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3865                 be_cmd_manage_iface(adapter, adapter->if_handle,
3866                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3867
3868         if (adapter->vxlan_port)
3869                 be_cmd_set_vxlan_port(adapter, 0);
3870
3871         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3872         adapter->vxlan_port = 0;
3873
3874         netdev->hw_enc_features = 0;
3875         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3876         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3877 }
3878
3879 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3880                                 struct be_resources *vft_res)
3881 {
3882         struct be_resources res = adapter->pool_res;
3883         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3884         struct be_resources res_mod = {0};
3885         u16 num_vf_qs = 1;
3886
3887         /* Distribute the queue resources among the PF and it's VFs */
3888         if (num_vfs) {
3889                 /* Divide the rx queues evenly among the VFs and the PF, capped
3890                  * at VF-EQ-count. Any remainder queues belong to the PF.
3891                  */
3892                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3893                                 res.max_rss_qs / (num_vfs + 1));
3894
3895                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3896                  * RSS Tables per port. Provide RSS on VFs, only if number of
3897                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3898                  */
3899                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3900                         num_vf_qs = 1;
3901         }
3902
3903         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3904          * which are modifiable using SET_PROFILE_CONFIG cmd.
3905          */
3906         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3907                                   RESOURCE_MODIFIABLE, 0);
3908
3909         /* If RSS IFACE capability flags are modifiable for a VF, set the
3910          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3911          * more than 1 RSSQ is available for a VF.
3912          * Otherwise, provision only 1 queue pair for VF.
3913          */
3914         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3915                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3916                 if (num_vf_qs > 1) {
3917                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3918                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3919                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3920                 } else {
3921                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3922                                              BE_IF_FLAGS_DEFQ_RSS);
3923                 }
3924         } else {
3925                 num_vf_qs = 1;
3926         }
3927
3928         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3929                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3930                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3931         }
3932
3933         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3934         vft_res->max_rx_qs = num_vf_qs;
3935         vft_res->max_rss_qs = num_vf_qs;
3936         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3937         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3938
3939         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3940          * among the PF and it's VFs, if the fields are changeable
3941          */
3942         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3943                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3944
3945         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3946                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3947
3948         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3949                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3950
3951         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3952                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3953 }
3954
3955 static void be_if_destroy(struct be_adapter *adapter)
3956 {
3957         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3958
3959         kfree(adapter->pmac_id);
3960         adapter->pmac_id = NULL;
3961
3962         kfree(adapter->mc_list);
3963         adapter->mc_list = NULL;
3964
3965         kfree(adapter->uc_list);
3966         adapter->uc_list = NULL;
3967 }
3968
3969 static int be_clear(struct be_adapter *adapter)
3970 {
3971         struct pci_dev *pdev = adapter->pdev;
3972         struct  be_resources vft_res = {0};
3973
3974         be_cancel_worker(adapter);
3975
3976         flush_workqueue(be_wq);
3977
3978         if (sriov_enabled(adapter))
3979                 be_vf_clear(adapter);
3980
3981         /* Re-configure FW to distribute resources evenly across max-supported
3982          * number of VFs, only when VFs are not already enabled.
3983          */
3984         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3985             !pci_vfs_assigned(pdev)) {
3986                 be_calculate_vf_res(adapter,
3987                                     pci_sriov_get_totalvfs(pdev),
3988                                     &vft_res);
3989                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
3990                                         pci_sriov_get_totalvfs(pdev),
3991                                         &vft_res);
3992         }
3993
3994         be_disable_vxlan_offloads(adapter);
3995
3996         be_if_destroy(adapter);
3997
3998         be_clear_queues(adapter);
3999
4000         be_msix_disable(adapter);
4001         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4002         return 0;
4003 }
4004
4005 static int be_vfs_if_create(struct be_adapter *adapter)
4006 {
4007         struct be_resources res = {0};
4008         u32 cap_flags, en_flags, vf;
4009         struct be_vf_cfg *vf_cfg;
4010         int status;
4011
4012         /* If a FW profile exists, then cap_flags are updated */
4013         cap_flags = BE_VF_IF_EN_FLAGS;
4014
4015         for_all_vfs(adapter, vf_cfg, vf) {
4016                 if (!BE3_chip(adapter)) {
4017                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4018                                                            ACTIVE_PROFILE_TYPE,
4019                                                            RESOURCE_LIMITS,
4020                                                            vf + 1);
4021                         if (!status) {
4022                                 cap_flags = res.if_cap_flags;
4023                                 /* Prevent VFs from enabling VLAN promiscuous
4024                                  * mode
4025                                  */
4026                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4027                         }
4028                 }
4029
4030                 /* PF should enable IF flags during proxy if_create call */
4031                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4032                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4033                                           &vf_cfg->if_handle, vf + 1);
4034                 if (status)
4035                         return status;
4036         }
4037
4038         return 0;
4039 }
4040
4041 static int be_vf_setup_init(struct be_adapter *adapter)
4042 {
4043         struct be_vf_cfg *vf_cfg;
4044         int vf;
4045
4046         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4047                                   GFP_KERNEL);
4048         if (!adapter->vf_cfg)
4049                 return -ENOMEM;
4050
4051         for_all_vfs(adapter, vf_cfg, vf) {
4052                 vf_cfg->if_handle = -1;
4053                 vf_cfg->pmac_id = -1;
4054         }
4055         return 0;
4056 }
4057
4058 static int be_vf_setup(struct be_adapter *adapter)
4059 {
4060         struct device *dev = &adapter->pdev->dev;
4061         struct be_vf_cfg *vf_cfg;
4062         int status, old_vfs, vf;
4063         bool spoofchk;
4064
4065         old_vfs = pci_num_vf(adapter->pdev);
4066
4067         status = be_vf_setup_init(adapter);
4068         if (status)
4069                 goto err;
4070
4071         if (old_vfs) {
4072                 for_all_vfs(adapter, vf_cfg, vf) {
4073                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4074                         if (status)
4075                                 goto err;
4076                 }
4077
4078                 status = be_vfs_mac_query(adapter);
4079                 if (status)
4080                         goto err;
4081         } else {
4082                 status = be_vfs_if_create(adapter);
4083                 if (status)
4084                         goto err;
4085
4086                 status = be_vf_eth_addr_config(adapter);
4087                 if (status)
4088                         goto err;
4089         }
4090
4091         for_all_vfs(adapter, vf_cfg, vf) {
4092                 /* Allow VFs to programs MAC/VLAN filters */
4093                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4094                                                   vf + 1);
4095                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4096                         status = be_cmd_set_fn_privileges(adapter,
4097                                                           vf_cfg->privileges |
4098                                                           BE_PRIV_FILTMGMT,
4099                                                           vf + 1);
4100                         if (!status) {
4101                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4102                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4103                                          vf);
4104                         }
4105                 }
4106
4107                 /* Allow full available bandwidth */
4108                 if (!old_vfs)
4109                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4110
4111                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4112                                                vf_cfg->if_handle, NULL,
4113                                                &spoofchk);
4114                 if (!status)
4115                         vf_cfg->spoofchk = spoofchk;
4116
4117                 if (!old_vfs) {
4118                         be_cmd_enable_vf(adapter, vf + 1);
4119                         be_cmd_set_logical_link_config(adapter,
4120                                                        IFLA_VF_LINK_STATE_AUTO,
4121                                                        vf+1);
4122                 }
4123         }
4124
4125         if (!old_vfs) {
4126                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4127                 if (status) {
4128                         dev_err(dev, "SRIOV enable failed\n");
4129                         adapter->num_vfs = 0;
4130                         goto err;
4131                 }
4132         }
4133
4134         if (BE3_chip(adapter)) {
4135                 /* On BE3, enable VEB only when SRIOV is enabled */
4136                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4137                                                adapter->if_handle,
4138                                                PORT_FWD_TYPE_VEB, 0);
4139                 if (status)
4140                         goto err;
4141         }
4142
4143         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4144         return 0;
4145 err:
4146         dev_err(dev, "VF setup failed\n");
4147         be_vf_clear(adapter);
4148         return status;
4149 }
4150
4151 /* Converting function_mode bits on BE3 to SH mc_type enums */
4152
4153 static u8 be_convert_mc_type(u32 function_mode)
4154 {
4155         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4156                 return vNIC1;
4157         else if (function_mode & QNQ_MODE)
4158                 return FLEX10;
4159         else if (function_mode & VNIC_MODE)
4160                 return vNIC2;
4161         else if (function_mode & UMC_ENABLED)
4162                 return UMC;
4163         else
4164                 return MC_NONE;
4165 }
4166
4167 /* On BE2/BE3 FW does not suggest the supported limits */
4168 static void BEx_get_resources(struct be_adapter *adapter,
4169                               struct be_resources *res)
4170 {
4171         bool use_sriov = adapter->num_vfs ? 1 : 0;
4172
4173         if (be_physfn(adapter))
4174                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4175         else
4176                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4177
4178         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4179
4180         if (be_is_mc(adapter)) {
4181                 /* Assuming that there are 4 channels per port,
4182                  * when multi-channel is enabled
4183                  */
4184                 if (be_is_qnq_mode(adapter))
4185                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4186                 else
4187                         /* In a non-qnq multichannel mode, the pvid
4188                          * takes up one vlan entry
4189                          */
4190                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4191         } else {
4192                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4193         }
4194
4195         res->max_mcast_mac = BE_MAX_MC;
4196
4197         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4198          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4199          *    *only* if it is RSS-capable.
4200          */
4201         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4202             be_virtfn(adapter) ||
4203             (be_is_mc(adapter) &&
4204              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4205                 res->max_tx_qs = 1;
4206         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4207                 struct be_resources super_nic_res = {0};
4208
4209                 /* On a SuperNIC profile, the driver needs to use the
4210                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4211                  */
4212                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4213                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4214                                           0);
4215                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4216                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4217         } else {
4218                 res->max_tx_qs = BE3_MAX_TX_QS;
4219         }
4220
4221         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4222             !use_sriov && be_physfn(adapter))
4223                 res->max_rss_qs = (adapter->be3_native) ?
4224                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4225         res->max_rx_qs = res->max_rss_qs + 1;
4226
4227         if (be_physfn(adapter))
4228                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4229                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4230         else
4231                 res->max_evt_qs = 1;
4232
4233         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4234         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4235         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4236                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4237 }
4238
4239 static void be_setup_init(struct be_adapter *adapter)
4240 {
4241         adapter->vlan_prio_bmap = 0xff;
4242         adapter->phy.link_speed = -1;
4243         adapter->if_handle = -1;
4244         adapter->be3_native = false;
4245         adapter->if_flags = 0;
4246         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4247         if (be_physfn(adapter))
4248                 adapter->cmd_privileges = MAX_PRIVILEGES;
4249         else
4250                 adapter->cmd_privileges = MIN_PRIVILEGES;
4251 }
4252
4253 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4254  * However, this HW limitation is not exposed to the host via any SLI cmd.
4255  * As a result, in the case of SRIOV and in particular multi-partition configs
4256  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4257  * for distribution between the VFs. This self-imposed limit will determine the
4258  * no: of VFs for which RSS can be enabled.
4259  */
4260 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4261 {
4262         struct be_port_resources port_res = {0};
4263         u8 rss_tables_on_port;
4264         u16 max_vfs = be_max_vfs(adapter);
4265
4266         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4267                                   RESOURCE_LIMITS, 0);
4268
4269         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4270
4271         /* Each PF Pool's RSS Tables limit =
4272          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4273          */
4274         adapter->pool_res.max_rss_tables =
4275                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4276 }
4277
4278 static int be_get_sriov_config(struct be_adapter *adapter)
4279 {
4280         struct be_resources res = {0};
4281         int max_vfs, old_vfs;
4282
4283         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4284                                   RESOURCE_LIMITS, 0);
4285
4286         /* Some old versions of BE3 FW don't report max_vfs value */
4287         if (BE3_chip(adapter) && !res.max_vfs) {
4288                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4289                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4290         }
4291
4292         adapter->pool_res = res;
4293
4294         /* If during previous unload of the driver, the VFs were not disabled,
4295          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4296          * Instead use the TotalVFs value stored in the pci-dev struct.
4297          */
4298         old_vfs = pci_num_vf(adapter->pdev);
4299         if (old_vfs) {
4300                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4301                          old_vfs);
4302
4303                 adapter->pool_res.max_vfs =
4304                         pci_sriov_get_totalvfs(adapter->pdev);
4305                 adapter->num_vfs = old_vfs;
4306         }
4307
4308         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4309                 be_calculate_pf_pool_rss_tables(adapter);
4310                 dev_info(&adapter->pdev->dev,
4311                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4312                          be_max_pf_pool_rss_tables(adapter));
4313         }
4314         return 0;
4315 }
4316
4317 static void be_alloc_sriov_res(struct be_adapter *adapter)
4318 {
4319         int old_vfs = pci_num_vf(adapter->pdev);
4320         struct  be_resources vft_res = {0};
4321         int status;
4322
4323         be_get_sriov_config(adapter);
4324
4325         if (!old_vfs)
4326                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4327
4328         /* When the HW is in SRIOV capable configuration, the PF-pool
4329          * resources are given to PF during driver load, if there are no
4330          * old VFs. This facility is not available in BE3 FW.
4331          * Also, this is done by FW in Lancer chip.
4332          */
4333         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4334                 be_calculate_vf_res(adapter, 0, &vft_res);
4335                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4336                                                  &vft_res);
4337                 if (status)
4338                         dev_err(&adapter->pdev->dev,
4339                                 "Failed to optimize SRIOV resources\n");
4340         }
4341 }
4342
4343 static int be_get_resources(struct be_adapter *adapter)
4344 {
4345         struct device *dev = &adapter->pdev->dev;
4346         struct be_resources res = {0};
4347         int status;
4348
4349         /* For Lancer, SH etc read per-function resource limits from FW.
4350          * GET_FUNC_CONFIG returns per function guaranteed limits.
4351          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4352          */
4353         if (BEx_chip(adapter)) {
4354                 BEx_get_resources(adapter, &res);
4355         } else {
4356                 status = be_cmd_get_func_config(adapter, &res);
4357                 if (status)
4358                         return status;
4359
4360                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4361                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4362                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4363                         res.max_rss_qs -= 1;
4364         }
4365
4366         /* If RoCE is supported stash away half the EQs for RoCE */
4367         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4368                                 res.max_evt_qs / 2 : res.max_evt_qs;
4369         adapter->res = res;
4370
4371         /* If FW supports RSS default queue, then skip creating non-RSS
4372          * queue for non-IP traffic.
4373          */
4374         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4375                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4376
4377         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4378                  be_max_txqs(adapter), be_max_rxqs(adapter),
4379                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4380                  be_max_vfs(adapter));
4381         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4382                  be_max_uc(adapter), be_max_mc(adapter),
4383                  be_max_vlans(adapter));
4384
4385         /* Ensure RX and TX queues are created in pairs at init time */
4386         adapter->cfg_num_rx_irqs =
4387                                 min_t(u16, netif_get_num_default_rss_queues(),
4388                                       be_max_qp_irqs(adapter));
4389         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4390         return 0;
4391 }
4392
4393 static int be_get_config(struct be_adapter *adapter)
4394 {
4395         int status, level;
4396         u16 profile_id;
4397
4398         status = be_cmd_get_cntl_attributes(adapter);
4399         if (status)
4400                 return status;
4401
4402         status = be_cmd_query_fw_cfg(adapter);
4403         if (status)
4404                 return status;
4405
4406         if (!lancer_chip(adapter) && be_physfn(adapter))
4407                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4408
4409         if (BEx_chip(adapter)) {
4410                 level = be_cmd_get_fw_log_level(adapter);
4411                 adapter->msg_enable =
4412                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4413         }
4414
4415         be_cmd_get_acpi_wol_cap(adapter);
4416         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4417         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4418
4419         be_cmd_query_port_name(adapter);
4420
4421         if (be_physfn(adapter)) {
4422                 status = be_cmd_get_active_profile(adapter, &profile_id);
4423                 if (!status)
4424                         dev_info(&adapter->pdev->dev,
4425                                  "Using profile 0x%x\n", profile_id);
4426         }
4427
4428         return 0;
4429 }
4430
4431 static int be_mac_setup(struct be_adapter *adapter)
4432 {
4433         u8 mac[ETH_ALEN];
4434         int status;
4435
4436         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4437                 status = be_cmd_get_perm_mac(adapter, mac);
4438                 if (status)
4439                         return status;
4440
4441                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4442                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4443
4444                 /* Initial MAC for BE3 VFs is already programmed by PF */
4445                 if (BEx_chip(adapter) && be_virtfn(adapter))
4446                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4447         }
4448
4449         return 0;
4450 }
4451
4452 static void be_schedule_worker(struct be_adapter *adapter)
4453 {
4454         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4455         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4456 }
4457
4458 static void be_destroy_err_recovery_workq(void)
4459 {
4460         if (!be_err_recovery_workq)
4461                 return;
4462
4463         flush_workqueue(be_err_recovery_workq);
4464         destroy_workqueue(be_err_recovery_workq);
4465         be_err_recovery_workq = NULL;
4466 }
4467
4468 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4469 {
4470         struct be_error_recovery *err_rec = &adapter->error_recovery;
4471
4472         if (!be_err_recovery_workq)
4473                 return;
4474
4475         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4476                            msecs_to_jiffies(delay));
4477         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4478 }
4479
4480 static int be_setup_queues(struct be_adapter *adapter)
4481 {
4482         struct net_device *netdev = adapter->netdev;
4483         int status;
4484
4485         status = be_evt_queues_create(adapter);
4486         if (status)
4487                 goto err;
4488
4489         status = be_tx_qs_create(adapter);
4490         if (status)
4491                 goto err;
4492
4493         status = be_rx_cqs_create(adapter);
4494         if (status)
4495                 goto err;
4496
4497         status = be_mcc_queues_create(adapter);
4498         if (status)
4499                 goto err;
4500
4501         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4502         if (status)
4503                 goto err;
4504
4505         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4506         if (status)
4507                 goto err;
4508
4509         return 0;
4510 err:
4511         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4512         return status;
4513 }
4514
4515 static int be_if_create(struct be_adapter *adapter)
4516 {
4517         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4518         u32 cap_flags = be_if_cap_flags(adapter);
4519         int status;
4520
4521         /* alloc required memory for other filtering fields */
4522         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4523                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4524         if (!adapter->pmac_id)
4525                 return -ENOMEM;
4526
4527         adapter->mc_list = kcalloc(be_max_mc(adapter),
4528                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4529         if (!adapter->mc_list)
4530                 return -ENOMEM;
4531
4532         adapter->uc_list = kcalloc(be_max_uc(adapter),
4533                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4534         if (!adapter->uc_list)
4535                 return -ENOMEM;
4536
4537         if (adapter->cfg_num_rx_irqs == 1)
4538                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4539
4540         en_flags &= cap_flags;
4541         /* will enable all the needed filter flags in be_open() */
4542         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4543                                   &adapter->if_handle, 0);
4544
4545         if (status)
4546                 return status;
4547
4548         return 0;
4549 }
4550
4551 int be_update_queues(struct be_adapter *adapter)
4552 {
4553         struct net_device *netdev = adapter->netdev;
4554         int status;
4555
4556         if (netif_running(netdev))
4557                 be_close(netdev);
4558
4559         be_cancel_worker(adapter);
4560
4561         /* If any vectors have been shared with RoCE we cannot re-program
4562          * the MSIx table.
4563          */
4564         if (!adapter->num_msix_roce_vec)
4565                 be_msix_disable(adapter);
4566
4567         be_clear_queues(adapter);
4568         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4569         if (status)
4570                 return status;
4571
4572         if (!msix_enabled(adapter)) {
4573                 status = be_msix_enable(adapter);
4574                 if (status)
4575                         return status;
4576         }
4577
4578         status = be_if_create(adapter);
4579         if (status)
4580                 return status;
4581
4582         status = be_setup_queues(adapter);
4583         if (status)
4584                 return status;
4585
4586         be_schedule_worker(adapter);
4587
4588         if (netif_running(netdev))
4589                 status = be_open(netdev);
4590
4591         return status;
4592 }
4593
4594 static inline int fw_major_num(const char *fw_ver)
4595 {
4596         int fw_major = 0, i;
4597
4598         i = sscanf(fw_ver, "%d.", &fw_major);
4599         if (i != 1)
4600                 return 0;
4601
4602         return fw_major;
4603 }
4604
4605 /* If it is error recovery, FLR the PF
4606  * Else if any VFs are already enabled don't FLR the PF
4607  */
4608 static bool be_reset_required(struct be_adapter *adapter)
4609 {
4610         if (be_error_recovering(adapter))
4611                 return true;
4612         else
4613                 return pci_num_vf(adapter->pdev) == 0;
4614 }
4615
4616 /* Wait for the FW to be ready and perform the required initialization */
4617 static int be_func_init(struct be_adapter *adapter)
4618 {
4619         int status;
4620
4621         status = be_fw_wait_ready(adapter);
4622         if (status)
4623                 return status;
4624
4625         /* FW is now ready; clear errors to allow cmds/doorbell */
4626         be_clear_error(adapter, BE_CLEAR_ALL);
4627
4628         if (be_reset_required(adapter)) {
4629                 status = be_cmd_reset_function(adapter);
4630                 if (status)
4631                         return status;
4632
4633                 /* Wait for interrupts to quiesce after an FLR */
4634                 msleep(100);
4635         }
4636
4637         /* Tell FW we're ready to fire cmds */
4638         status = be_cmd_fw_init(adapter);
4639         if (status)
4640                 return status;
4641
4642         /* Allow interrupts for other ULPs running on NIC function */
4643         be_intr_set(adapter, true);
4644
4645         return 0;
4646 }
4647
4648 static int be_setup(struct be_adapter *adapter)
4649 {
4650         struct device *dev = &adapter->pdev->dev;
4651         int status;
4652
4653         status = be_func_init(adapter);
4654         if (status)
4655                 return status;
4656
4657         be_setup_init(adapter);
4658
4659         if (!lancer_chip(adapter))
4660                 be_cmd_req_native_mode(adapter);
4661
4662         /* invoke this cmd first to get pf_num and vf_num which are needed
4663          * for issuing profile related cmds
4664          */
4665         if (!BEx_chip(adapter)) {
4666                 status = be_cmd_get_func_config(adapter, NULL);
4667                 if (status)
4668                         return status;
4669         }
4670
4671         status = be_get_config(adapter);
4672         if (status)
4673                 goto err;
4674
4675         if (!BE2_chip(adapter) && be_physfn(adapter))
4676                 be_alloc_sriov_res(adapter);
4677
4678         status = be_get_resources(adapter);
4679         if (status)
4680                 goto err;
4681
4682         status = be_msix_enable(adapter);
4683         if (status)
4684                 goto err;
4685
4686         /* will enable all the needed filter flags in be_open() */
4687         status = be_if_create(adapter);
4688         if (status)
4689                 goto err;
4690
4691         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4692         rtnl_lock();
4693         status = be_setup_queues(adapter);
4694         rtnl_unlock();
4695         if (status)
4696                 goto err;
4697
4698         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4699
4700         status = be_mac_setup(adapter);
4701         if (status)
4702                 goto err;
4703
4704         be_cmd_get_fw_ver(adapter);
4705         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4706
4707         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4708                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4709                         adapter->fw_ver);
4710                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4711         }
4712
4713         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4714                                          adapter->rx_fc);
4715         if (status)
4716                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4717                                         &adapter->rx_fc);
4718
4719         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4720                  adapter->tx_fc, adapter->rx_fc);
4721
4722         if (be_physfn(adapter))
4723                 be_cmd_set_logical_link_config(adapter,
4724                                                IFLA_VF_LINK_STATE_AUTO, 0);
4725
4726         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4727          * confusing a linux bridge or OVS that it might be connected to.
4728          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4729          * when SRIOV is not enabled.
4730          */
4731         if (BE3_chip(adapter))
4732                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4733                                       PORT_FWD_TYPE_PASSTHRU, 0);
4734
4735         if (adapter->num_vfs)
4736                 be_vf_setup(adapter);
4737
4738         status = be_cmd_get_phy_info(adapter);
4739         if (!status && be_pause_supported(adapter))
4740                 adapter->phy.fc_autoneg = 1;
4741
4742         if (be_physfn(adapter) && !lancer_chip(adapter))
4743                 be_cmd_set_features(adapter);
4744
4745         be_schedule_worker(adapter);
4746         adapter->flags |= BE_FLAGS_SETUP_DONE;
4747         return 0;
4748 err:
4749         be_clear(adapter);
4750         return status;
4751 }
4752
4753 #ifdef CONFIG_NET_POLL_CONTROLLER
4754 static void be_netpoll(struct net_device *netdev)
4755 {
4756         struct be_adapter *adapter = netdev_priv(netdev);
4757         struct be_eq_obj *eqo;
4758         int i;
4759
4760         for_all_evt_queues(adapter, eqo, i) {
4761                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4762                 napi_schedule(&eqo->napi);
4763         }
4764 }
4765 #endif
4766
4767 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4768 {
4769         const struct firmware *fw;
4770         int status;
4771
4772         if (!netif_running(adapter->netdev)) {
4773                 dev_err(&adapter->pdev->dev,
4774                         "Firmware load not allowed (interface is down)\n");
4775                 return -ENETDOWN;
4776         }
4777
4778         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4779         if (status)
4780                 goto fw_exit;
4781
4782         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4783
4784         if (lancer_chip(adapter))
4785                 status = lancer_fw_download(adapter, fw);
4786         else
4787                 status = be_fw_download(adapter, fw);
4788
4789         if (!status)
4790                 be_cmd_get_fw_ver(adapter);
4791
4792 fw_exit:
4793         release_firmware(fw);
4794         return status;
4795 }
4796
4797 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4798                                  u16 flags)
4799 {
4800         struct be_adapter *adapter = netdev_priv(dev);
4801         struct nlattr *attr, *br_spec;
4802         int rem;
4803         int status = 0;
4804         u16 mode = 0;
4805
4806         if (!sriov_enabled(adapter))
4807                 return -EOPNOTSUPP;
4808
4809         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4810         if (!br_spec)
4811                 return -EINVAL;
4812
4813         nla_for_each_nested(attr, br_spec, rem) {
4814                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4815                         continue;
4816
4817                 if (nla_len(attr) < sizeof(mode))
4818                         return -EINVAL;
4819
4820                 mode = nla_get_u16(attr);
4821                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4822                         return -EOPNOTSUPP;
4823
4824                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4825                         return -EINVAL;
4826
4827                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4828                                                adapter->if_handle,
4829                                                mode == BRIDGE_MODE_VEPA ?
4830                                                PORT_FWD_TYPE_VEPA :
4831                                                PORT_FWD_TYPE_VEB, 0);
4832                 if (status)
4833                         goto err;
4834
4835                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4836                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4837
4838                 return status;
4839         }
4840 err:
4841         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4842                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4843
4844         return status;
4845 }
4846
4847 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4848                                  struct net_device *dev, u32 filter_mask,
4849                                  int nlflags)
4850 {
4851         struct be_adapter *adapter = netdev_priv(dev);
4852         int status = 0;
4853         u8 hsw_mode;
4854
4855         /* BE and Lancer chips support VEB mode only */
4856         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4857                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4858                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4859                         return 0;
4860                 hsw_mode = PORT_FWD_TYPE_VEB;
4861         } else {
4862                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4863                                                adapter->if_handle, &hsw_mode,
4864                                                NULL);
4865                 if (status)
4866                         return 0;
4867
4868                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4869                         return 0;
4870         }
4871
4872         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4873                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4874                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4875                                        0, 0, nlflags, filter_mask, NULL);
4876 }
4877
4878 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4879                                          void (*func)(struct work_struct *))
4880 {
4881         struct be_cmd_work *work;
4882
4883         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4884         if (!work) {
4885                 dev_err(&adapter->pdev->dev,
4886                         "be_work memory allocation failed\n");
4887                 return NULL;
4888         }
4889
4890         INIT_WORK(&work->work, func);
4891         work->adapter = adapter;
4892         return work;
4893 }
4894
4895 /* VxLAN offload Notes:
4896  *
4897  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4898  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4899  * is expected to work across all types of IP tunnels once exported. Skyhawk
4900  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4901  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4902  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4903  * those other tunnels are unexported on the fly through ndo_features_check().
4904  *
4905  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4906  * adds more than one port, disable offloads and don't re-enable them again
4907  * until after all the tunnels are removed.
4908  */
4909 static void be_work_add_vxlan_port(struct work_struct *work)
4910 {
4911         struct be_cmd_work *cmd_work =
4912                                 container_of(work, struct be_cmd_work, work);
4913         struct be_adapter *adapter = cmd_work->adapter;
4914         struct net_device *netdev = adapter->netdev;
4915         struct device *dev = &adapter->pdev->dev;
4916         __be16 port = cmd_work->info.vxlan_port;
4917         int status;
4918
4919         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4920                 adapter->vxlan_port_aliases++;
4921                 goto done;
4922         }
4923
4924         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4925                 dev_info(dev,
4926                          "Only one UDP port supported for VxLAN offloads\n");
4927                 dev_info(dev, "Disabling VxLAN offloads\n");
4928                 adapter->vxlan_port_count++;
4929                 goto err;
4930         }
4931
4932         if (adapter->vxlan_port_count++ >= 1)
4933                 goto done;
4934
4935         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4936                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4937         if (status) {
4938                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4939                 goto err;
4940         }
4941
4942         status = be_cmd_set_vxlan_port(adapter, port);
4943         if (status) {
4944                 dev_warn(dev, "Failed to add VxLAN port\n");
4945                 goto err;
4946         }
4947         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4948         adapter->vxlan_port = port;
4949
4950         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4951                                    NETIF_F_TSO | NETIF_F_TSO6 |
4952                                    NETIF_F_GSO_UDP_TUNNEL;
4953         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4954         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4955
4956         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4957                  be16_to_cpu(port));
4958         goto done;
4959 err:
4960         be_disable_vxlan_offloads(adapter);
4961 done:
4962         kfree(cmd_work);
4963 }
4964
4965 static void be_work_del_vxlan_port(struct work_struct *work)
4966 {
4967         struct be_cmd_work *cmd_work =
4968                                 container_of(work, struct be_cmd_work, work);
4969         struct be_adapter *adapter = cmd_work->adapter;
4970         __be16 port = cmd_work->info.vxlan_port;
4971
4972         if (adapter->vxlan_port != port)
4973                 goto done;
4974
4975         if (adapter->vxlan_port_aliases) {
4976                 adapter->vxlan_port_aliases--;
4977                 goto out;
4978         }
4979
4980         be_disable_vxlan_offloads(adapter);
4981
4982         dev_info(&adapter->pdev->dev,
4983                  "Disabled VxLAN offloads for UDP port %d\n",
4984                  be16_to_cpu(port));
4985 done:
4986         adapter->vxlan_port_count--;
4987 out:
4988         kfree(cmd_work);
4989 }
4990
4991 static void be_cfg_vxlan_port(struct net_device *netdev,
4992                               struct udp_tunnel_info *ti,
4993                               void (*func)(struct work_struct *))
4994 {
4995         struct be_adapter *adapter = netdev_priv(netdev);
4996         struct be_cmd_work *cmd_work;
4997
4998         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4999                 return;
5000
5001         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5002                 return;
5003
5004         cmd_work = be_alloc_work(adapter, func);
5005         if (cmd_work) {
5006                 cmd_work->info.vxlan_port = ti->port;
5007                 queue_work(be_wq, &cmd_work->work);
5008         }
5009 }
5010
5011 static void be_del_vxlan_port(struct net_device *netdev,
5012                               struct udp_tunnel_info *ti)
5013 {
5014         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5015 }
5016
5017 static void be_add_vxlan_port(struct net_device *netdev,
5018                               struct udp_tunnel_info *ti)
5019 {
5020         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5021 }
5022
5023 static netdev_features_t be_features_check(struct sk_buff *skb,
5024                                            struct net_device *dev,
5025                                            netdev_features_t features)
5026 {
5027         struct be_adapter *adapter = netdev_priv(dev);
5028         u8 l4_hdr = 0;
5029
5030         /* The code below restricts offload features for some tunneled and
5031          * Q-in-Q packets.
5032          * Offload features for normal (non tunnel) packets are unchanged.
5033          */
5034         features = vlan_features_check(skb, features);
5035         if (!skb->encapsulation ||
5036             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5037                 return features;
5038
5039         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5040          * should disable tunnel offload features if it's not a VxLAN packet,
5041          * as tunnel offloads have been enabled only for VxLAN. This is done to
5042          * allow other tunneled traffic like GRE work fine while VxLAN
5043          * offloads are configured in Skyhawk-R.
5044          */
5045         switch (vlan_get_protocol(skb)) {
5046         case htons(ETH_P_IP):
5047                 l4_hdr = ip_hdr(skb)->protocol;
5048                 break;
5049         case htons(ETH_P_IPV6):
5050                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5051                 break;
5052         default:
5053                 return features;
5054         }
5055
5056         if (l4_hdr != IPPROTO_UDP ||
5057             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5058             skb->inner_protocol != htons(ETH_P_TEB) ||
5059             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5060                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5061             !adapter->vxlan_port ||
5062             udp_hdr(skb)->dest != adapter->vxlan_port)
5063                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5064
5065         return features;
5066 }
5067
5068 static int be_get_phys_port_id(struct net_device *dev,
5069                                struct netdev_phys_item_id *ppid)
5070 {
5071         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5072         struct be_adapter *adapter = netdev_priv(dev);
5073         u8 *id;
5074
5075         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5076                 return -ENOSPC;
5077
5078         ppid->id[0] = adapter->hba_port_num + 1;
5079         id = &ppid->id[1];
5080         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5081              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5082                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5083
5084         ppid->id_len = id_len;
5085
5086         return 0;
5087 }
5088
5089 static void be_set_rx_mode(struct net_device *dev)
5090 {
5091         struct be_adapter *adapter = netdev_priv(dev);
5092         struct be_cmd_work *work;
5093
5094         work = be_alloc_work(adapter, be_work_set_rx_mode);
5095         if (work)
5096                 queue_work(be_wq, &work->work);
5097 }
5098
5099 static const struct net_device_ops be_netdev_ops = {
5100         .ndo_open               = be_open,
5101         .ndo_stop               = be_close,
5102         .ndo_start_xmit         = be_xmit,
5103         .ndo_set_rx_mode        = be_set_rx_mode,
5104         .ndo_set_mac_address    = be_mac_addr_set,
5105         .ndo_get_stats64        = be_get_stats64,
5106         .ndo_validate_addr      = eth_validate_addr,
5107         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5108         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5109         .ndo_set_vf_mac         = be_set_vf_mac,
5110         .ndo_set_vf_vlan        = be_set_vf_vlan,
5111         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5112         .ndo_get_vf_config      = be_get_vf_config,
5113         .ndo_set_vf_link_state  = be_set_vf_link_state,
5114         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5115 #ifdef CONFIG_NET_POLL_CONTROLLER
5116         .ndo_poll_controller    = be_netpoll,
5117 #endif
5118         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5119         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5120         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5121         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5122         .ndo_features_check     = be_features_check,
5123         .ndo_get_phys_port_id   = be_get_phys_port_id,
5124 };
5125
5126 static void be_netdev_init(struct net_device *netdev)
5127 {
5128         struct be_adapter *adapter = netdev_priv(netdev);
5129
5130         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5131                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5132                 NETIF_F_HW_VLAN_CTAG_TX;
5133         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5134                 netdev->hw_features |= NETIF_F_RXHASH;
5135
5136         netdev->features |= netdev->hw_features |
5137                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5138
5139         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5140                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5141
5142         netdev->priv_flags |= IFF_UNICAST_FLT;
5143
5144         netdev->flags |= IFF_MULTICAST;
5145
5146         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5147
5148         netdev->netdev_ops = &be_netdev_ops;
5149
5150         netdev->ethtool_ops = &be_ethtool_ops;
5151
5152         /* MTU range: 256 - 9000 */
5153         netdev->min_mtu = BE_MIN_MTU;
5154         netdev->max_mtu = BE_MAX_MTU;
5155 }
5156
5157 static void be_cleanup(struct be_adapter *adapter)
5158 {
5159         struct net_device *netdev = adapter->netdev;
5160
5161         rtnl_lock();
5162         netif_device_detach(netdev);
5163         if (netif_running(netdev))
5164                 be_close(netdev);
5165         rtnl_unlock();
5166
5167         be_clear(adapter);
5168 }
5169
5170 static int be_resume(struct be_adapter *adapter)
5171 {
5172         struct net_device *netdev = adapter->netdev;
5173         int status;
5174
5175         status = be_setup(adapter);
5176         if (status)
5177                 return status;
5178
5179         rtnl_lock();
5180         if (netif_running(netdev))
5181                 status = be_open(netdev);
5182         rtnl_unlock();
5183
5184         if (status)
5185                 return status;
5186
5187         netif_device_attach(netdev);
5188
5189         return 0;
5190 }
5191
5192 static void be_soft_reset(struct be_adapter *adapter)
5193 {
5194         u32 val;
5195
5196         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5197         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5198         val |= SLIPORT_SOFTRESET_SR_MASK;
5199         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5200 }
5201
5202 static bool be_err_is_recoverable(struct be_adapter *adapter)
5203 {
5204         struct be_error_recovery *err_rec = &adapter->error_recovery;
5205         unsigned long initial_idle_time =
5206                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5207         unsigned long recovery_interval =
5208                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5209         u16 ue_err_code;
5210         u32 val;
5211
5212         val = be_POST_stage_get(adapter);
5213         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5214                 return false;
5215         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5216         if (ue_err_code == 0)
5217                 return false;
5218
5219         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5220                 ue_err_code);
5221
5222         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5223                 dev_err(&adapter->pdev->dev,
5224                         "Cannot recover within %lu sec from driver load\n",
5225                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5226                 return false;
5227         }
5228
5229         if (err_rec->last_recovery_time &&
5230             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5231                 dev_err(&adapter->pdev->dev,
5232                         "Cannot recover within %lu sec from last recovery\n",
5233                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5234                 return false;
5235         }
5236
5237         if (ue_err_code == err_rec->last_err_code) {
5238                 dev_err(&adapter->pdev->dev,
5239                         "Cannot recover from a consecutive TPE error\n");
5240                 return false;
5241         }
5242
5243         err_rec->last_recovery_time = jiffies;
5244         err_rec->last_err_code = ue_err_code;
5245         return true;
5246 }
5247
5248 static int be_tpe_recover(struct be_adapter *adapter)
5249 {
5250         struct be_error_recovery *err_rec = &adapter->error_recovery;
5251         int status = -EAGAIN;
5252         u32 val;
5253
5254         switch (err_rec->recovery_state) {
5255         case ERR_RECOVERY_ST_NONE:
5256                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5257                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5258                 break;
5259
5260         case ERR_RECOVERY_ST_DETECT:
5261                 val = be_POST_stage_get(adapter);
5262                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5263                     POST_STAGE_RECOVERABLE_ERR) {
5264                         dev_err(&adapter->pdev->dev,
5265                                 "Unrecoverable HW error detected: 0x%x\n", val);
5266                         status = -EINVAL;
5267                         err_rec->resched_delay = 0;
5268                         break;
5269                 }
5270
5271                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5272
5273                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5274                  * milliseconds before it checks for final error status in
5275                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5276                  * If it does, then PF0 initiates a Soft Reset.
5277                  */
5278                 if (adapter->pf_num == 0) {
5279                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5280                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5281                                         ERR_RECOVERY_UE_DETECT_DURATION;
5282                         break;
5283                 }
5284
5285                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5286                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5287                                         ERR_RECOVERY_UE_DETECT_DURATION;
5288                 break;
5289
5290         case ERR_RECOVERY_ST_RESET:
5291                 if (!be_err_is_recoverable(adapter)) {
5292                         dev_err(&adapter->pdev->dev,
5293                                 "Failed to meet recovery criteria\n");
5294                         status = -EIO;
5295                         err_rec->resched_delay = 0;
5296                         break;
5297                 }
5298                 be_soft_reset(adapter);
5299                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5300                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5301                                         err_rec->ue_to_reset_time;
5302                 break;
5303
5304         case ERR_RECOVERY_ST_PRE_POLL:
5305                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5306                 err_rec->resched_delay = 0;
5307                 status = 0;                     /* done */
5308                 break;
5309
5310         default:
5311                 status = -EINVAL;
5312                 err_rec->resched_delay = 0;
5313                 break;
5314         }
5315
5316         return status;
5317 }
5318
5319 static int be_err_recover(struct be_adapter *adapter)
5320 {
5321         int status;
5322
5323         if (!lancer_chip(adapter)) {
5324                 if (!adapter->error_recovery.recovery_supported ||
5325                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5326                         return -EIO;
5327                 status = be_tpe_recover(adapter);
5328                 if (status)
5329                         goto err;
5330         }
5331
5332         /* Wait for adapter to reach quiescent state before
5333          * destroying queues
5334          */
5335         status = be_fw_wait_ready(adapter);
5336         if (status)
5337                 goto err;
5338
5339         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5340
5341         be_cleanup(adapter);
5342
5343         status = be_resume(adapter);
5344         if (status)
5345                 goto err;
5346
5347         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5348
5349 err:
5350         return status;
5351 }
5352
5353 static void be_err_detection_task(struct work_struct *work)
5354 {
5355         struct be_error_recovery *err_rec =
5356                         container_of(work, struct be_error_recovery,
5357                                      err_detection_work.work);
5358         struct be_adapter *adapter =
5359                         container_of(err_rec, struct be_adapter,
5360                                      error_recovery);
5361         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5362         struct device *dev = &adapter->pdev->dev;
5363         int recovery_status;
5364
5365         be_detect_error(adapter);
5366         if (!be_check_error(adapter, BE_ERROR_HW))
5367                 goto reschedule_task;
5368
5369         recovery_status = be_err_recover(adapter);
5370         if (!recovery_status) {
5371                 err_rec->recovery_retries = 0;
5372                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5373                 dev_info(dev, "Adapter recovery successful\n");
5374                 goto reschedule_task;
5375         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5376                 /* BEx/SH recovery state machine */
5377                 if (adapter->pf_num == 0 &&
5378                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5379                         dev_err(&adapter->pdev->dev,
5380                                 "Adapter recovery in progress\n");
5381                 resched_delay = err_rec->resched_delay;
5382                 goto reschedule_task;
5383         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5384                 /* For VFs, check if PF have allocated resources
5385                  * every second.
5386                  */
5387                 dev_err(dev, "Re-trying adapter recovery\n");
5388                 goto reschedule_task;
5389         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5390                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5391                 /* In case of another error during recovery, it takes 30 sec
5392                  * for adapter to come out of error. Retry error recovery after
5393                  * this time interval.
5394                  */
5395                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5396                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5397                 goto reschedule_task;
5398         } else {
5399                 dev_err(dev, "Adapter recovery failed\n");
5400                 dev_err(dev, "Please reboot server to recover\n");
5401         }
5402
5403         return;
5404
5405 reschedule_task:
5406         be_schedule_err_detection(adapter, resched_delay);
5407 }
5408
5409 static void be_log_sfp_info(struct be_adapter *adapter)
5410 {
5411         int status;
5412
5413         status = be_cmd_query_sfp_info(adapter);
5414         if (!status) {
5415                 dev_err(&adapter->pdev->dev,
5416                         "Port %c: %s Vendor: %s part no: %s",
5417                         adapter->port_name,
5418                         be_misconfig_evt_port_state[adapter->phy_state],
5419                         adapter->phy.vendor_name,
5420                         adapter->phy.vendor_pn);
5421         }
5422         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5423 }
5424
5425 static void be_worker(struct work_struct *work)
5426 {
5427         struct be_adapter *adapter =
5428                 container_of(work, struct be_adapter, work.work);
5429         struct be_rx_obj *rxo;
5430         int i;
5431
5432         if (be_physfn(adapter) &&
5433             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5434                 be_cmd_get_die_temperature(adapter);
5435
5436         /* when interrupts are not yet enabled, just reap any pending
5437          * mcc completions
5438          */
5439         if (!netif_running(adapter->netdev)) {
5440                 local_bh_disable();
5441                 be_process_mcc(adapter);
5442                 local_bh_enable();
5443                 goto reschedule;
5444         }
5445
5446         if (!adapter->stats_cmd_sent) {
5447                 if (lancer_chip(adapter))
5448                         lancer_cmd_get_pport_stats(adapter,
5449                                                    &adapter->stats_cmd);
5450                 else
5451                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5452         }
5453
5454         for_all_rx_queues(adapter, rxo, i) {
5455                 /* Replenish RX-queues starved due to memory
5456                  * allocation failures.
5457                  */
5458                 if (rxo->rx_post_starved)
5459                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5460         }
5461
5462         /* EQ-delay update for Skyhawk is done while notifying EQ */
5463         if (!skyhawk_chip(adapter))
5464                 be_eqd_update(adapter, false);
5465
5466         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5467                 be_log_sfp_info(adapter);
5468
5469 reschedule:
5470         adapter->work_counter++;
5471         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5472 }
5473
5474 static void be_unmap_pci_bars(struct be_adapter *adapter)
5475 {
5476         if (adapter->csr)
5477                 pci_iounmap(adapter->pdev, adapter->csr);
5478         if (adapter->db)
5479                 pci_iounmap(adapter->pdev, adapter->db);
5480         if (adapter->pcicfg && adapter->pcicfg_mapped)
5481                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5482 }
5483
5484 static int db_bar(struct be_adapter *adapter)
5485 {
5486         if (lancer_chip(adapter) || be_virtfn(adapter))
5487                 return 0;
5488         else
5489                 return 4;
5490 }
5491
5492 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5493 {
5494         if (skyhawk_chip(adapter)) {
5495                 adapter->roce_db.size = 4096;
5496                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5497                                                               db_bar(adapter));
5498                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5499                                                                db_bar(adapter));
5500         }
5501         return 0;
5502 }
5503
5504 static int be_map_pci_bars(struct be_adapter *adapter)
5505 {
5506         struct pci_dev *pdev = adapter->pdev;
5507         u8 __iomem *addr;
5508         u32 sli_intf;
5509
5510         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5511         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5512                                 SLI_INTF_FAMILY_SHIFT;
5513         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5514
5515         if (BEx_chip(adapter) && be_physfn(adapter)) {
5516                 adapter->csr = pci_iomap(pdev, 2, 0);
5517                 if (!adapter->csr)
5518                         return -ENOMEM;
5519         }
5520
5521         addr = pci_iomap(pdev, db_bar(adapter), 0);
5522         if (!addr)
5523                 goto pci_map_err;
5524         adapter->db = addr;
5525
5526         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5527                 if (be_physfn(adapter)) {
5528                         /* PCICFG is the 2nd BAR in BE2 */
5529                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5530                         if (!addr)
5531                                 goto pci_map_err;
5532                         adapter->pcicfg = addr;
5533                         adapter->pcicfg_mapped = true;
5534                 } else {
5535                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5536                         adapter->pcicfg_mapped = false;
5537                 }
5538         }
5539
5540         be_roce_map_pci_bars(adapter);
5541         return 0;
5542
5543 pci_map_err:
5544         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5545         be_unmap_pci_bars(adapter);
5546         return -ENOMEM;
5547 }
5548
5549 static void be_drv_cleanup(struct be_adapter *adapter)
5550 {
5551         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5552         struct device *dev = &adapter->pdev->dev;
5553
5554         if (mem->va)
5555                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5556
5557         mem = &adapter->rx_filter;
5558         if (mem->va)
5559                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5560
5561         mem = &adapter->stats_cmd;
5562         if (mem->va)
5563                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5564 }
5565
5566 /* Allocate and initialize various fields in be_adapter struct */
5567 static int be_drv_init(struct be_adapter *adapter)
5568 {
5569         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5570         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5571         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5572         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5573         struct device *dev = &adapter->pdev->dev;
5574         int status = 0;
5575
5576         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5577         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5578                                                  &mbox_mem_alloc->dma,
5579                                                  GFP_KERNEL);
5580         if (!mbox_mem_alloc->va)
5581                 return -ENOMEM;
5582
5583         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5584         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5585         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5586
5587         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5588         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5589                                             &rx_filter->dma, GFP_KERNEL);
5590         if (!rx_filter->va) {
5591                 status = -ENOMEM;
5592                 goto free_mbox;
5593         }
5594
5595         if (lancer_chip(adapter))
5596                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5597         else if (BE2_chip(adapter))
5598                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5599         else if (BE3_chip(adapter))
5600                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5601         else
5602                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5603         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5604                                             &stats_cmd->dma, GFP_KERNEL);
5605         if (!stats_cmd->va) {
5606                 status = -ENOMEM;
5607                 goto free_rx_filter;
5608         }
5609
5610         mutex_init(&adapter->mbox_lock);
5611         mutex_init(&adapter->mcc_lock);
5612         mutex_init(&adapter->rx_filter_lock);
5613         spin_lock_init(&adapter->mcc_cq_lock);
5614         init_completion(&adapter->et_cmd_compl);
5615
5616         pci_save_state(adapter->pdev);
5617
5618         INIT_DELAYED_WORK(&adapter->work, be_worker);
5619
5620         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5621         adapter->error_recovery.resched_delay = 0;
5622         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5623                           be_err_detection_task);
5624
5625         adapter->rx_fc = true;
5626         adapter->tx_fc = true;
5627
5628         /* Must be a power of 2 or else MODULO will BUG_ON */
5629         adapter->be_get_temp_freq = 64;
5630
5631         return 0;
5632
5633 free_rx_filter:
5634         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5635 free_mbox:
5636         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5637                           mbox_mem_alloc->dma);
5638         return status;
5639 }
5640
5641 static void be_remove(struct pci_dev *pdev)
5642 {
5643         struct be_adapter *adapter = pci_get_drvdata(pdev);
5644
5645         if (!adapter)
5646                 return;
5647
5648         be_roce_dev_remove(adapter);
5649         be_intr_set(adapter, false);
5650
5651         be_cancel_err_detection(adapter);
5652
5653         unregister_netdev(adapter->netdev);
5654
5655         be_clear(adapter);
5656
5657         if (!pci_vfs_assigned(adapter->pdev))
5658                 be_cmd_reset_function(adapter);
5659
5660         /* tell fw we're done with firing cmds */
5661         be_cmd_fw_clean(adapter);
5662
5663         be_unmap_pci_bars(adapter);
5664         be_drv_cleanup(adapter);
5665
5666         pci_disable_pcie_error_reporting(pdev);
5667
5668         pci_release_regions(pdev);
5669         pci_disable_device(pdev);
5670
5671         free_netdev(adapter->netdev);
5672 }
5673
5674 static ssize_t be_hwmon_show_temp(struct device *dev,
5675                                   struct device_attribute *dev_attr,
5676                                   char *buf)
5677 {
5678         struct be_adapter *adapter = dev_get_drvdata(dev);
5679
5680         /* Unit: millidegree Celsius */
5681         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5682                 return -EIO;
5683         else
5684                 return sprintf(buf, "%u\n",
5685                                adapter->hwmon_info.be_on_die_temp * 1000);
5686 }
5687
5688 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5689                           be_hwmon_show_temp, NULL, 1);
5690
5691 static struct attribute *be_hwmon_attrs[] = {
5692         &sensor_dev_attr_temp1_input.dev_attr.attr,
5693         NULL
5694 };
5695
5696 ATTRIBUTE_GROUPS(be_hwmon);
5697
5698 static char *mc_name(struct be_adapter *adapter)
5699 {
5700         char *str = ""; /* default */
5701
5702         switch (adapter->mc_type) {
5703         case UMC:
5704                 str = "UMC";
5705                 break;
5706         case FLEX10:
5707                 str = "FLEX10";
5708                 break;
5709         case vNIC1:
5710                 str = "vNIC-1";
5711                 break;
5712         case nPAR:
5713                 str = "nPAR";
5714                 break;
5715         case UFP:
5716                 str = "UFP";
5717                 break;
5718         case vNIC2:
5719                 str = "vNIC-2";
5720                 break;
5721         default:
5722                 str = "";
5723         }
5724
5725         return str;
5726 }
5727
5728 static inline char *func_name(struct be_adapter *adapter)
5729 {
5730         return be_physfn(adapter) ? "PF" : "VF";
5731 }
5732
5733 static inline char *nic_name(struct pci_dev *pdev)
5734 {
5735         switch (pdev->device) {
5736         case OC_DEVICE_ID1:
5737                 return OC_NAME;
5738         case OC_DEVICE_ID2:
5739                 return OC_NAME_BE;
5740         case OC_DEVICE_ID3:
5741         case OC_DEVICE_ID4:
5742                 return OC_NAME_LANCER;
5743         case BE_DEVICE_ID2:
5744                 return BE3_NAME;
5745         case OC_DEVICE_ID5:
5746         case OC_DEVICE_ID6:
5747                 return OC_NAME_SH;
5748         default:
5749                 return BE_NAME;
5750         }
5751 }
5752
5753 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5754 {
5755         struct be_adapter *adapter;
5756         struct net_device *netdev;
5757         int status = 0;
5758
5759         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5760
5761         status = pci_enable_device(pdev);
5762         if (status)
5763                 goto do_none;
5764
5765         status = pci_request_regions(pdev, DRV_NAME);
5766         if (status)
5767                 goto disable_dev;
5768         pci_set_master(pdev);
5769
5770         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5771         if (!netdev) {
5772                 status = -ENOMEM;
5773                 goto rel_reg;
5774         }
5775         adapter = netdev_priv(netdev);
5776         adapter->pdev = pdev;
5777         pci_set_drvdata(pdev, adapter);
5778         adapter->netdev = netdev;
5779         SET_NETDEV_DEV(netdev, &pdev->dev);
5780
5781         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5782         if (!status) {
5783                 netdev->features |= NETIF_F_HIGHDMA;
5784         } else {
5785                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5786                 if (status) {
5787                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5788                         goto free_netdev;
5789                 }
5790         }
5791
5792         status = pci_enable_pcie_error_reporting(pdev);
5793         if (!status)
5794                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5795
5796         status = be_map_pci_bars(adapter);
5797         if (status)
5798                 goto free_netdev;
5799
5800         status = be_drv_init(adapter);
5801         if (status)
5802                 goto unmap_bars;
5803
5804         status = be_setup(adapter);
5805         if (status)
5806                 goto drv_cleanup;
5807
5808         be_netdev_init(netdev);
5809         status = register_netdev(netdev);
5810         if (status != 0)
5811                 goto unsetup;
5812
5813         be_roce_dev_add(adapter);
5814
5815         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5816         adapter->error_recovery.probe_time = jiffies;
5817
5818         /* On Die temperature not supported for VF. */
5819         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5820                 adapter->hwmon_info.hwmon_dev =
5821                         devm_hwmon_device_register_with_groups(&pdev->dev,
5822                                                                DRV_NAME,
5823                                                                adapter,
5824                                                                be_hwmon_groups);
5825                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5826         }
5827
5828         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5829                  func_name(adapter), mc_name(adapter), adapter->port_name);
5830
5831         return 0;
5832
5833 unsetup:
5834         be_clear(adapter);
5835 drv_cleanup:
5836         be_drv_cleanup(adapter);
5837 unmap_bars:
5838         be_unmap_pci_bars(adapter);
5839 free_netdev:
5840         free_netdev(netdev);
5841 rel_reg:
5842         pci_release_regions(pdev);
5843 disable_dev:
5844         pci_disable_device(pdev);
5845 do_none:
5846         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5847         return status;
5848 }
5849
5850 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5851 {
5852         struct be_adapter *adapter = pci_get_drvdata(pdev);
5853
5854         be_intr_set(adapter, false);
5855         be_cancel_err_detection(adapter);
5856
5857         be_cleanup(adapter);
5858
5859         pci_save_state(pdev);
5860         pci_disable_device(pdev);
5861         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5862         return 0;
5863 }
5864
5865 static int be_pci_resume(struct pci_dev *pdev)
5866 {
5867         struct be_adapter *adapter = pci_get_drvdata(pdev);
5868         int status = 0;
5869
5870         status = pci_enable_device(pdev);
5871         if (status)
5872                 return status;
5873
5874         pci_restore_state(pdev);
5875
5876         status = be_resume(adapter);
5877         if (status)
5878                 return status;
5879
5880         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5881
5882         return 0;
5883 }
5884
5885 /*
5886  * An FLR will stop BE from DMAing any data.
5887  */
5888 static void be_shutdown(struct pci_dev *pdev)
5889 {
5890         struct be_adapter *adapter = pci_get_drvdata(pdev);
5891
5892         if (!adapter)
5893                 return;
5894
5895         be_roce_dev_shutdown(adapter);
5896         cancel_delayed_work_sync(&adapter->work);
5897         be_cancel_err_detection(adapter);
5898
5899         netif_device_detach(adapter->netdev);
5900
5901         be_cmd_reset_function(adapter);
5902
5903         pci_disable_device(pdev);
5904 }
5905
5906 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5907                                             pci_channel_state_t state)
5908 {
5909         struct be_adapter *adapter = pci_get_drvdata(pdev);
5910
5911         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5912
5913         be_roce_dev_remove(adapter);
5914
5915         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5916                 be_set_error(adapter, BE_ERROR_EEH);
5917
5918                 be_cancel_err_detection(adapter);
5919
5920                 be_cleanup(adapter);
5921         }
5922
5923         if (state == pci_channel_io_perm_failure)
5924                 return PCI_ERS_RESULT_DISCONNECT;
5925
5926         pci_disable_device(pdev);
5927
5928         /* The error could cause the FW to trigger a flash debug dump.
5929          * Resetting the card while flash dump is in progress
5930          * can cause it not to recover; wait for it to finish.
5931          * Wait only for first function as it is needed only once per
5932          * adapter.
5933          */
5934         if (pdev->devfn == 0)
5935                 ssleep(30);
5936
5937         return PCI_ERS_RESULT_NEED_RESET;
5938 }
5939
5940 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5941 {
5942         struct be_adapter *adapter = pci_get_drvdata(pdev);
5943         int status;
5944
5945         dev_info(&adapter->pdev->dev, "EEH reset\n");
5946
5947         status = pci_enable_device(pdev);
5948         if (status)
5949                 return PCI_ERS_RESULT_DISCONNECT;
5950
5951         pci_set_master(pdev);
5952         pci_restore_state(pdev);
5953
5954         /* Check if card is ok and fw is ready */
5955         dev_info(&adapter->pdev->dev,
5956                  "Waiting for FW to be ready after EEH reset\n");
5957         status = be_fw_wait_ready(adapter);
5958         if (status)
5959                 return PCI_ERS_RESULT_DISCONNECT;
5960
5961         pci_cleanup_aer_uncorrect_error_status(pdev);
5962         be_clear_error(adapter, BE_CLEAR_ALL);
5963         return PCI_ERS_RESULT_RECOVERED;
5964 }
5965
5966 static void be_eeh_resume(struct pci_dev *pdev)
5967 {
5968         int status = 0;
5969         struct be_adapter *adapter = pci_get_drvdata(pdev);
5970
5971         dev_info(&adapter->pdev->dev, "EEH resume\n");
5972
5973         pci_save_state(pdev);
5974
5975         status = be_resume(adapter);
5976         if (status)
5977                 goto err;
5978
5979         be_roce_dev_add(adapter);
5980
5981         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5982         return;
5983 err:
5984         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5985 }
5986
5987 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5988 {
5989         struct be_adapter *adapter = pci_get_drvdata(pdev);
5990         struct be_resources vft_res = {0};
5991         int status;
5992
5993         if (!num_vfs)
5994                 be_vf_clear(adapter);
5995
5996         adapter->num_vfs = num_vfs;
5997
5998         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5999                 dev_warn(&pdev->dev,
6000                          "Cannot disable VFs while they are assigned\n");
6001                 return -EBUSY;
6002         }
6003
6004         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6005          * are equally distributed across the max-number of VFs. The user may
6006          * request only a subset of the max-vfs to be enabled.
6007          * Based on num_vfs, redistribute the resources across num_vfs so that
6008          * each VF will have access to more number of resources.
6009          * This facility is not available in BE3 FW.
6010          * Also, this is done by FW in Lancer chip.
6011          */
6012         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6013                 be_calculate_vf_res(adapter, adapter->num_vfs,
6014                                     &vft_res);
6015                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6016                                                  adapter->num_vfs, &vft_res);
6017                 if (status)
6018                         dev_err(&pdev->dev,
6019                                 "Failed to optimize SR-IOV resources\n");
6020         }
6021
6022         status = be_get_resources(adapter);
6023         if (status)
6024                 return be_cmd_status(status);
6025
6026         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6027         rtnl_lock();
6028         status = be_update_queues(adapter);
6029         rtnl_unlock();
6030         if (status)
6031                 return be_cmd_status(status);
6032
6033         if (adapter->num_vfs)
6034                 status = be_vf_setup(adapter);
6035
6036         if (!status)
6037                 return adapter->num_vfs;
6038
6039         return 0;
6040 }
6041
6042 static const struct pci_error_handlers be_eeh_handlers = {
6043         .error_detected = be_eeh_err_detected,
6044         .slot_reset = be_eeh_reset,
6045         .resume = be_eeh_resume,
6046 };
6047
6048 static struct pci_driver be_driver = {
6049         .name = DRV_NAME,
6050         .id_table = be_dev_ids,
6051         .probe = be_probe,
6052         .remove = be_remove,
6053         .suspend = be_suspend,
6054         .resume = be_pci_resume,
6055         .shutdown = be_shutdown,
6056         .sriov_configure = be_pci_sriov_configure,
6057         .err_handler = &be_eeh_handlers
6058 };
6059
6060 static int __init be_init_module(void)
6061 {
6062         int status;
6063
6064         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6065             rx_frag_size != 2048) {
6066                 printk(KERN_WARNING DRV_NAME
6067                         " : Module param rx_frag_size must be 2048/4096/8192."
6068                         " Using 2048\n");
6069                 rx_frag_size = 2048;
6070         }
6071
6072         if (num_vfs > 0) {
6073                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6074                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6075         }
6076
6077         be_wq = create_singlethread_workqueue("be_wq");
6078         if (!be_wq) {
6079                 pr_warn(DRV_NAME "workqueue creation failed\n");
6080                 return -1;
6081         }
6082
6083         be_err_recovery_workq =
6084                 create_singlethread_workqueue("be_err_recover");
6085         if (!be_err_recovery_workq)
6086                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6087
6088         status = pci_register_driver(&be_driver);
6089         if (status) {
6090                 destroy_workqueue(be_wq);
6091                 be_destroy_err_recovery_workq();
6092         }
6093         return status;
6094 }
6095 module_init(be_init_module);
6096
6097 static void __exit be_exit_module(void)
6098 {
6099         pci_unregister_driver(&be_driver);
6100
6101         be_destroy_err_recovery_workq();
6102
6103         if (be_wq)
6104                 destroy_workqueue(be_wq);
6105 }
6106 module_exit(be_exit_module);