--- /dev/null
+From: Manikanta Pubbisetty <mpubbise@codeaurora.org>
+Date: Wed, 11 Jul 2018 00:12:53 +0530
+Subject: [PATCH] mac80211: add stop/start logic for software TXQs
+
+Sometimes, it is required to stop the transmissions momentarily and
+resume it later; stopping the txqs becomes very critical in scenarios where
+the packet transmission has to be ceased completely. For example, during
+the hardware restart, during off channel operations,
+when initiating CSA(upon detecting a radar on the DFS channel), etc.
+
+The TX queue stop/start logic in mac80211 works well in stopping the TX
+when drivers make use of netdev queues, i.e, when Qdiscs in network layer
+take care of traffic scheduling. Since the devices implementing
+wake_tx_queue can run without Qdiscs, packets will be handed to mac80211
+directly without queueing them in the netdev queues.
+
+Also, mac80211 does not invoke any of the
+netif_stop_*/netif_wake_* APIs if wake_tx_queue is implemented.
+Since the queues are not stopped in this case, transmissions can continue
+and this will impact negatively on the operation of the wireless device.
+
+For example,
+During hardware restart, we stop the netdev queues so that packets are
+not sent to the driver. Since ath10k implements wake_tx_queue,
+TX queues will not be stopped and packets might reach the hardware while
+it is restarting; this can make hardware unresponsive and the only
+possible option for recovery is to reboot the entire system.
+
+There is another problem to this, it is observed that the packets
+were sent on the DFS channel for a prolonged duration after radar
+detection impacting the channel closing time.
+
+We can still invoke netif stop/wake APIs when wake_tx_queue is implemented
+but this could lead to packet drops in network layer; adding stop/start
+logic for software TXQs in mac80211 instead makes more sense; the change
+proposed adds the same in mac80211.
+
+Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -1453,6 +1453,8 @@ enum ieee80211_vif_flags {
+ * @drv_priv: data area for driver use, will always be aligned to
+ * sizeof(void \*).
+ * @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
++ * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
++ * protected by fq->lock.
+ */
+ struct ieee80211_vif {
+ enum nl80211_iftype type;
+@@ -1477,6 +1479,8 @@ struct ieee80211_vif {
+
+ unsigned int probe_req_reg;
+
++ bool txqs_stopped[IEEE80211_NUM_ACS];
++
+ /* must be last */
+ u8 drv_priv[0] __aligned(sizeof(void *));
+ };
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -816,6 +816,7 @@ enum txq_info_flags {
+ IEEE80211_TXQ_STOP,
+ IEEE80211_TXQ_AMPDU,
+ IEEE80211_TXQ_NO_AMSDU,
++ IEEE80211_TXQ_STOP_NETIF_TX,
+ };
+
+ /**
+@@ -1223,6 +1224,7 @@ struct ieee80211_local {
+
+ struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
+ struct tasklet_struct tx_pending_tasklet;
++ struct tasklet_struct wake_txqs_tasklet;
+
+ atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
+
+@@ -2037,6 +2039,7 @@ void ieee80211_txq_purge(struct ieee8021
+ struct txq_info *txqi);
+ void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
++void ieee80211_wake_txqs(unsigned long data);
+ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
+ u16 transaction, u16 auth_alg, u16 status,
+ const u8 *extra, size_t extra_len, const u8 *bssid,
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -671,6 +671,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_
+ tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
+ (unsigned long)local);
+
++ if (ops->wake_tx_queue)
++ tasklet_init(&local->wake_txqs_tasklet, ieee80211_wake_txqs,
++ (unsigned long)local);
++
+ tasklet_init(&local->tasklet,
+ ieee80211_tasklet_handler,
+ (unsigned long) local);
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3463,13 +3463,19 @@ struct sk_buff *ieee80211_tx_dequeue(str
+ struct ieee80211_tx_info *info;
+ struct ieee80211_tx_data tx;
+ ieee80211_tx_result r;
+- struct ieee80211_vif *vif;
++ struct ieee80211_vif *vif = txq->vif;
+
+ spin_lock_bh(&fq->lock);
+
+- if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
++ if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
++ test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags))
+ goto out;
+
++ if (vif->txqs_stopped[ieee80211_ac_from_tid(txq->tid)]) {
++ set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags);
++ goto out;
++ }
++
+ /* Make sure fragments stay together. */
+ skb = __skb_dequeue(&txqi->frags);
+ if (skb)
+@@ -3565,6 +3571,7 @@ begin:
+ }
+
+ IEEE80211_SKB_CB(skb)->control.vif = vif;
++
+ out:
+ spin_unlock_bh(&fq->lock);
+
+--- a/net/mac80211/util.c
++++ b/net/mac80211/util.c
+@@ -239,6 +239,99 @@ __le16 ieee80211_ctstoself_duration(stru
+ }
+ EXPORT_SYMBOL(ieee80211_ctstoself_duration);
+
++static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
++{
++ struct ieee80211_local *local = sdata->local;
++ struct ieee80211_vif *vif = &sdata->vif;
++ struct fq *fq = &local->fq;
++ struct ps_data *ps = NULL;
++ struct txq_info *txqi;
++ struct sta_info *sta;
++ int i;
++
++ spin_lock_bh(&fq->lock);
++
++ if (sdata->vif.type == NL80211_IFTYPE_AP)
++ ps = &sdata->bss->ps;
++
++ sdata->vif.txqs_stopped[ac] = false;
++
++ list_for_each_entry_rcu(sta, &local->sta_list, list) {
++ if (sdata != sta->sdata)
++ continue;
++
++ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
++ struct ieee80211_txq *txq = sta->sta.txq[i];
++
++ txqi = to_txq_info(txq);
++
++ if (ac != txq->ac)
++ continue;
++
++ if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX,
++ &txqi->flags))
++ continue;
++
++ spin_unlock_bh(&fq->lock);
++ drv_wake_tx_queue(local, txqi);
++ spin_lock_bh(&fq->lock);
++ }
++ }
++
++ if (!vif->txq)
++ goto out;
++
++ txqi = to_txq_info(vif->txq);
++
++ if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) ||
++ (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
++ goto out;
++
++ spin_unlock_bh(&fq->lock);
++
++ drv_wake_tx_queue(local, txqi);
++ return;
++out:
++ spin_unlock_bh(&fq->lock);
++}
++
++void ieee80211_wake_txqs(unsigned long data)
++{
++ struct ieee80211_local *local = (struct ieee80211_local *)data;
++ struct ieee80211_sub_if_data *sdata;
++ int n_acs = IEEE80211_NUM_ACS;
++ unsigned long flags;
++ int i;
++
++ rcu_read_lock();
++ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
++
++ if (local->hw.queues < IEEE80211_NUM_ACS)
++ n_acs = 1;
++
++ for (i = 0; i < local->hw.queues; i++) {
++ if (local->queue_stop_reasons[i])
++ continue;
++
++ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
++ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
++ int ac;
++
++ for (ac = 0; ac < n_acs; ac++) {
++ int ac_queue = sdata->vif.hw_queue[ac];
++
++ if (ac_queue == i ||
++ sdata->vif.cab_queue == i)
++ __ieee80211_wake_txqs(sdata, ac);
++ }
++ }
++ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
++ }
++
++ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
++ rcu_read_unlock();
++}
++
+ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
+ {
+ struct ieee80211_sub_if_data *sdata;
+@@ -307,6 +400,9 @@ static void __ieee80211_wake_queue(struc
+ rcu_read_unlock();
+ } else
+ tasklet_schedule(&local->tx_pending_tasklet);
++
++ if (local->ops->wake_tx_queue)
++ tasklet_schedule(&local->wake_txqs_tasklet);
+ }
+
+ void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
+@@ -350,9 +446,6 @@ static void __ieee80211_stop_queue(struc
+ if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
+ return;
+
+- if (local->ops->wake_tx_queue)
+- return;
+-
+ if (local->hw.queues < IEEE80211_NUM_ACS)
+ n_acs = 1;
+
+@@ -365,8 +458,15 @@ static void __ieee80211_stop_queue(struc
+
+ for (ac = 0; ac < n_acs; ac++) {
+ if (sdata->vif.hw_queue[ac] == queue ||
+- sdata->vif.cab_queue == queue)
+- netif_stop_subqueue(sdata->dev, ac);
++ sdata->vif.cab_queue == queue) {
++ if (!local->ops->wake_tx_queue) {
++ netif_stop_subqueue(sdata->dev, ac);
++ continue;
++ }
++ spin_lock(&local->fq.lock);
++ sdata->vif.txqs_stopped[ac] = true;
++ spin_unlock(&local->fq.lock);
++ }
+ }
+ }
+ rcu_read_unlock();