From a0c05a7c94137945b93dacc2dcd0f27d09539650 Mon Sep 17 00:00:00 2001 From: Philip Prindeville Date: Fri, 11 Aug 2017 18:59:52 -0600 Subject: [PATCH] kernel: fix receiver overflow for 82574L under load Under heavy load it's possible to overrun the 82574L. When this happens, Other Interrupt happens and that's erroneously interpreted as a Link Status Change. http://patchwork.ozlabs.org/patch/792260/ Signed-off-by: Philip Prindeville --- ...100e-avoid-receiver-interrupt-bursts.patch | 327 ++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 target/linux/generic/pending-4.9/190-e100e-avoid-receiver-interrupt-bursts.patch diff --git a/target/linux/generic/pending-4.9/190-e100e-avoid-receiver-interrupt-bursts.patch b/target/linux/generic/pending-4.9/190-e100e-avoid-receiver-interrupt-bursts.patch new file mode 100644 index 0000000000..f2966fafbb --- /dev/null +++ b/target/linux/generic/pending-4.9/190-e100e-avoid-receiver-interrupt-bursts.patch @@ -0,0 +1,327 @@ +Subject: [1/5] e1000e: Fix error path in link detection +From: Benjamin Poirier +X-Patchwork-Id: 792259 +Message-Id: <20170721183627.13373-1-bpoirier@suse.com> +To: Jeff Kirsher +Cc: netdev@vger.kernel.org, intel-wired-lan@lists.osuosl.org, + linux-kernel@vger.kernel.org, + Lennart Sorensen +Date: Fri, 21 Jul 2017 11:36:23 -0700 + +In case of error from e1e_rphy(), the loop will exit early and "success" +will be set to true erroneously. + +Signed-off-by: Benjamin Poirier +--- + drivers/net/ethernet/intel/e1000e/phy.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c +index d78d47b41a71..86ff0969efb6 100644 +--- a/drivers/net/ethernet/intel/e1000e/phy.c ++++ b/drivers/net/ethernet/intel/e1000e/phy.c +@@ -1744,6 +1744,7 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, + s32 ret_val = 0; + u16 i, phy_status; + ++ *success = false; + for (i = 0; i < iterations; i++) { + /* Some PHYs require the MII_BMSR register to be read + * twice due to the link bit being sticky. No harm doing +@@ -1763,16 +1764,16 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, + ret_val = e1e_rphy(hw, MII_BMSR, &phy_status); + if (ret_val) + break; +- if (phy_status & BMSR_LSTATUS) ++ if (phy_status & BMSR_LSTATUS) { ++ *success = true; + break; ++ } + if (usec_interval >= 1000) + msleep(usec_interval / 1000); + else + udelay(usec_interval); + } + +- *success = (i < iterations); +- + return ret_val; + } + + +Subject: [2/5] e1000e: Fix wrong comment related to link detection +From: Benjamin Poirier +X-Patchwork-Id: 792257 +Message-Id: <20170721183627.13373-2-bpoirier@suse.com> +To: Jeff Kirsher +Cc: netdev@vger.kernel.org, intel-wired-lan@lists.osuosl.org, + linux-kernel@vger.kernel.org, + Lennart Sorensen +Date: Fri, 21 Jul 2017 11:36:24 -0700 + +Reading e1000e_check_for_copper_link() shows that get_link_status is set to +false after link has been detected. Therefore, it stays TRUE until then. + +Signed-off-by: Benjamin Poirier +--- + drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c +index 2dcb5463d9b8..58a87134d2e5 100644 +--- a/drivers/net/ethernet/intel/e1000e/netdev.c ++++ b/drivers/net/ethernet/intel/e1000e/netdev.c +@@ -5074,7 +5074,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) + + /* get_link_status is set on LSC (link status) interrupt or + * Rx sequence error interrupt. get_link_status will stay +- * false until the check_for_link establishes link ++ * true until the check_for_link establishes link + * for copper adapters ONLY + */ + switch (hw->phy.media_type) { +@@ -5092,7 +5092,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) + break; + case e1000_media_type_internal_serdes: + ret_val = hw->mac.ops.check_for_link(hw); +- link_active = adapter->hw.mac.serdes_has_link; ++ link_active = hw->mac.serdes_has_link; + break; + default: + case e1000_media_type_unknown: + +Content-Transfer-Encoding: 7bit +Subject: [3/5] e1000e: Fix return value test +From: Benjamin Poirier +X-Patchwork-Id: 792258 +Message-Id: <20170721183627.13373-3-bpoirier@suse.com> +To: Jeff Kirsher +Cc: netdev@vger.kernel.org, intel-wired-lan@lists.osuosl.org, + linux-kernel@vger.kernel.org, + Lennart Sorensen +Date: Fri, 21 Jul 2017 11:36:25 -0700 + +All the helpers return -E1000_ERR_PHY. + +Signed-off-by: Benjamin Poirier +--- + drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c +index 58a87134d2e5..fc6a1d9999b2 100644 +--- a/drivers/net/ethernet/intel/e1000e/netdev.c ++++ b/drivers/net/ethernet/intel/e1000e/netdev.c +@@ -5099,7 +5099,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) + break; + } + +- if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && ++ if ((ret_val == -E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && + (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { + /* See e1000_kmrn_lock_loss_workaround_ich8lan() */ + e_info("Gigabit has been disabled, downgrading speed\n"); + +Content-Transfer-Encoding: 7bit +Subject: [4/5] e1000e: Separate signaling for link check/link up +From: Benjamin Poirier +X-Patchwork-Id: 792262 +Message-Id: <20170721183627.13373-4-bpoirier@suse.com> +To: Jeff Kirsher +Cc: netdev@vger.kernel.org, intel-wired-lan@lists.osuosl.org, + linux-kernel@vger.kernel.org, + Lennart Sorensen +Date: Fri, 21 Jul 2017 11:36:26 -0700 + +Lennart reported the following race condition: + +\ e1000_watchdog_task + \ e1000e_has_link + \ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link + /* link is up */ + mac->get_link_status = false; + + /* interrupt */ + \ e1000_msix_other + hw->mac.get_link_status = true; + + link_active = !hw->mac.get_link_status + /* link_active is false, wrongly */ + +This problem arises because the single flag get_link_status is used to +signal two different states: link status needs checking and link status is +down. + +Avoid the problem by using the return value of .check_for_link to signal +the link status to e1000e_has_link(). + +Reported-by: Lennart Sorensen +Signed-off-by: Benjamin Poirier +--- + drivers/net/ethernet/intel/e1000e/mac.c | 11 ++++++++--- + drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c +index b322011ec282..f457c5703d0c 100644 +--- a/drivers/net/ethernet/intel/e1000e/mac.c ++++ b/drivers/net/ethernet/intel/e1000e/mac.c +@@ -410,6 +410,9 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw) + * Checks to see of the link status of the hardware has changed. If a + * change in link status has been detected, then we read the PHY registers + * to get the current speed/duplex if link exists. ++ * ++ * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link ++ * up). + **/ + s32 e1000e_check_for_copper_link(struct e1000_hw *hw) + { +@@ -423,7 +426,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) + * Change or Rx Sequence Error interrupt. + */ + if (!mac->get_link_status) +- return 0; ++ return 1; + + /* First we want to see if the MII Status Register reports + * link. If so, then we want to get the current speed/duplex +@@ -461,10 +464,12 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) + * different link partner. + */ + ret_val = e1000e_config_fc_after_link_up(hw); +- if (ret_val) ++ if (ret_val) { + e_dbg("Error configuring flow control\n"); ++ return ret_val; ++ } + +- return ret_val; ++ return 1; + } + + /** +diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c +index fc6a1d9999b2..5a8ab1136566 100644 +--- a/drivers/net/ethernet/intel/e1000e/netdev.c ++++ b/drivers/net/ethernet/intel/e1000e/netdev.c +@@ -5081,7 +5081,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) + case e1000_media_type_copper: + if (hw->mac.get_link_status) { + ret_val = hw->mac.ops.check_for_link(hw); +- link_active = !hw->mac.get_link_status; ++ link_active = ret_val > 0; + } else { + link_active = true; + } + +Content-Transfer-Encoding: 7bit +Subject: [5/5] e1000e: Avoid receiver overrun interrupt bursts +From: Benjamin Poirier +X-Patchwork-Id: 792260 +Message-Id: <20170721183627.13373-5-bpoirier@suse.com> +To: Jeff Kirsher +Cc: netdev@vger.kernel.org, intel-wired-lan@lists.osuosl.org, + linux-kernel@vger.kernel.org, + Lennart Sorensen +Date: Fri, 21 Jul 2017 11:36:27 -0700 + +When e1000e_poll() is not fast enough to keep up with incoming traffic, the +adapter (when operating in msix mode) raises the Other interrupt to signal +Receiver Overrun. + +This is a double problem because 1) at the moment e1000_msix_other() +assumes that it is only called in case of Link Status Change and 2) if the +condition persists, the interrupt is repeatedly raised again in quick +succession. + +Ideally we would configure the Other interrupt to not be raised in case of +receiver overrun but this doesn't seem possible on this adapter. Instead, +we handle the first part of the problem by reverting to the practice of +reading ICR in the other interrupt handler, like before commit 16ecba59bc33 +("e1000e: Do not read ICR in Other interrupt"). Thanks to commit +0a8047ac68e5 ("e1000e: Fix msi-x interrupt automask") which cleared IAME +from CTRL_EXT, reading ICR doesn't interfere with RxQ0, TxQ0 interrupts +anymore. We handle the second part of the problem by not re-enabling the +Other interrupt right away when there is overrun. Instead, we wait until +traffic subsides, napi polling mode is exited and interrupts are +re-enabled. + +Reported-by: Lennart Sorensen +Fixes: 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt") +Signed-off-by: Benjamin Poirier +--- + drivers/net/ethernet/intel/e1000e/defines.h | 1 + + drivers/net/ethernet/intel/e1000e/netdev.c | 33 +++++++++++++++++++++++------ + 2 files changed, 27 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h +index 0641c0098738..afb7ebe20b24 100644 +--- a/drivers/net/ethernet/intel/e1000e/defines.h ++++ b/drivers/net/ethernet/intel/e1000e/defines.h +@@ -398,6 +398,7 @@ + #define E1000_ICR_LSC 0x00000004 /* Link Status Change */ + #define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ + #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ ++#define E1000_ICR_RXO 0x00000040 /* Receiver Overrun */ + #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ + #define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ + /* If this bit asserted, the driver should claim the interrupt */ +diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c +index 5a8ab1136566..803edd1a6401 100644 +--- a/drivers/net/ethernet/intel/e1000e/netdev.c ++++ b/drivers/net/ethernet/intel/e1000e/netdev.c +@@ -1910,12 +1910,30 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) + struct net_device *netdev = data; + struct e1000_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; ++ u32 icr; ++ bool enable = true; ++ ++ icr = er32(ICR); ++ if (icr & E1000_ICR_RXO) { ++ ew32(ICR, E1000_ICR_RXO); ++ enable = false; ++ /* napi poll will re-enable Other, make sure it runs */ ++ if (napi_schedule_prep(&adapter->napi)) { ++ adapter->total_rx_bytes = 0; ++ adapter->total_rx_packets = 0; ++ __napi_schedule(&adapter->napi); ++ } ++ } ++ if (icr & E1000_ICR_LSC) { ++ ew32(ICR, E1000_ICR_LSC); ++ hw->mac.get_link_status = true; ++ /* guard against interrupt when we're going down */ ++ if (!test_bit(__E1000_DOWN, &adapter->state)) { ++ mod_timer(&adapter->watchdog_timer, jiffies + 1); ++ } ++ } + +- hw->mac.get_link_status = true; +- +- /* guard against interrupt when we're going down */ +- if (!test_bit(__E1000_DOWN, &adapter->state)) { +- mod_timer(&adapter->watchdog_timer, jiffies + 1); ++ if (enable && !test_bit(__E1000_DOWN, &adapter->state)) { + ew32(IMS, E1000_IMS_OTHER); + } + +@@ -2687,7 +2705,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight) + napi_complete_done(napi, work_done); + if (!test_bit(__E1000_DOWN, &adapter->state)) { + if (adapter->msix_entries) +- ew32(IMS, adapter->rx_ring->ims_val); ++ ew32(IMS, adapter->rx_ring->ims_val | ++ E1000_IMS_OTHER); + else + e1000_irq_enable(adapter); + } +@@ -4204,7 +4223,7 @@ static void e1000e_trigger_lsc(struct e1000_adapter *adapter) + struct e1000_hw *hw = &adapter->hw; + + if (adapter->msix_entries) +- ew32(ICS, E1000_ICS_OTHER); ++ ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER); + else + ew32(ICS, E1000_ICS_LSC); + } -- 2.25.1