kernel: Fix arc kernel build
[oweals/openwrt.git] / target / linux / generic / backport-4.9 / 024-7-net-reorganize-struct-sock-for-better-data-locality.patch
1 From 9115e8cd2a0c6eaaa900c462721f12e1d45f326c Mon Sep 17 00:00:00 2001
2 From: Eric Dumazet <edumazet@google.com>
3 Date: Sat, 3 Dec 2016 11:14:56 -0800
4 Subject: [PATCH 07/10] net: reorganize struct sock for better data locality
5
6 Group fields used in TX path, and keep some cache lines mostly read
7 to permit sharing among cpus.
8
9 Gained two 4 bytes holes on 64bit arches.
10
11 Added a place holder for tcp tsq_flags, next to sk_wmem_alloc
12 to speed up tcp_wfree() in the following patch.
13
14 I have not added ____cacheline_aligned_in_smp, this might be done later.
15 I prefer doing this once inet and tcp/udp sockets reorg is also done.
16
17 Tested with both TCP and UDP.
18
19 UDP receiver performance under flood increased by ~20 % :
20 Accessing sk_filter/sk_wq/sk_napi_id no longer stalls because sk_drops
21 was moved away from a critical cache line, now mostly read and shared.
22
23         /* --- cacheline 4 boundary (256 bytes) --- */
24         unsigned int               sk_napi_id;           /* 0x100   0x4 */
25         int                        sk_rcvbuf;            /* 0x104   0x4 */
26         struct sk_filter *         sk_filter;            /* 0x108   0x8 */
27         union {
28                 struct socket_wq * sk_wq;                /*         0x8 */
29                 struct socket_wq * sk_wq_raw;            /*         0x8 */
30         };                                               /* 0x110   0x8 */
31         struct xfrm_policy *       sk_policy[2];         /* 0x118  0x10 */
32         struct dst_entry *         sk_rx_dst;            /* 0x128   0x8 */
33         struct dst_entry *         sk_dst_cache;         /* 0x130   0x8 */
34         atomic_t                   sk_omem_alloc;        /* 0x138   0x4 */
35         int                        sk_sndbuf;            /* 0x13c   0x4 */
36         /* --- cacheline 5 boundary (320 bytes) --- */
37         int                        sk_wmem_queued;       /* 0x140   0x4 */
38         atomic_t                   sk_wmem_alloc;        /* 0x144   0x4 */
39         long unsigned int          sk_tsq_flags;         /* 0x148   0x8 */
40         struct sk_buff *           sk_send_head;         /* 0x150   0x8 */
41         struct sk_buff_head        sk_write_queue;       /* 0x158  0x18 */
42         __s32                      sk_peek_off;          /* 0x170   0x4 */
43         int                        sk_write_pending;     /* 0x174   0x4 */
44         long int                   sk_sndtimeo;          /* 0x178   0x8 */
45
46 Signed-off-by: Eric Dumazet <edumazet@google.com>
47 Tested-by: Paolo Abeni <pabeni@redhat.com>
48 Signed-off-by: David S. Miller <davem@davemloft.net>
49 ---
50  include/net/sock.h | 51 +++++++++++++++++++++++++++------------------------
51  1 file changed, 27 insertions(+), 24 deletions(-)
52
53 --- a/include/net/sock.h
54 +++ b/include/net/sock.h
55 @@ -344,6 +344,9 @@ struct sock {
56  #define sk_rxhash              __sk_common.skc_rxhash
57  
58         socket_lock_t           sk_lock;
59 +       atomic_t                sk_drops;
60 +       int                     sk_rcvlowat;
61 +       struct sk_buff_head     sk_error_queue;
62         struct sk_buff_head     sk_receive_queue;
63         /*
64          * The backlog queue is special, it is always used with
65 @@ -360,14 +363,13 @@ struct sock {
66                 struct sk_buff  *tail;
67         } sk_backlog;
68  #define sk_rmem_alloc sk_backlog.rmem_alloc
69 -       int                     sk_forward_alloc;
70  
71 -       __u32                   sk_txhash;
72 +       int                     sk_forward_alloc;
73  #ifdef CONFIG_NET_RX_BUSY_POLL
74 -       unsigned int            sk_napi_id;
75         unsigned int            sk_ll_usec;
76 +       /* ===== mostly read cache line ===== */
77 +       unsigned int            sk_napi_id;
78  #endif
79 -       atomic_t                sk_drops;
80         int                     sk_rcvbuf;
81  
82         struct sk_filter __rcu  *sk_filter;
83 @@ -380,11 +382,30 @@ struct sock {
84  #endif
85         struct dst_entry        *sk_rx_dst;
86         struct dst_entry __rcu  *sk_dst_cache;
87 -       /* Note: 32bit hole on 64bit arches */
88 -       atomic_t                sk_wmem_alloc;
89         atomic_t                sk_omem_alloc;
90         int                     sk_sndbuf;
91 +
92 +       /* ===== cache line for TX ===== */
93 +       int                     sk_wmem_queued;
94 +       atomic_t                sk_wmem_alloc;
95 +       unsigned long           sk_tsq_flags;
96 +       struct sk_buff          *sk_send_head;
97         struct sk_buff_head     sk_write_queue;
98 +       __s32                   sk_peek_off;
99 +       int                     sk_write_pending;
100 +       long                    sk_sndtimeo;
101 +       struct timer_list       sk_timer;
102 +       __u32                   sk_priority;
103 +       __u32                   sk_mark;
104 +       u32                     sk_pacing_rate; /* bytes per second */
105 +       u32                     sk_max_pacing_rate;
106 +       struct page_frag        sk_frag;
107 +       netdev_features_t       sk_route_caps;
108 +       netdev_features_t       sk_route_nocaps;
109 +       int                     sk_gso_type;
110 +       unsigned int            sk_gso_max_size;
111 +       gfp_t                   sk_allocation;
112 +       __u32                   sk_txhash;
113  
114         /*
115          * Because of non atomicity rules, all
116 @@ -400,31 +421,17 @@ struct sock {
117  #define SK_PROTOCOL_MAX U8_MAX
118         kmemcheck_bitfield_end(flags);
119  
120 -       int                     sk_wmem_queued;
121 -       gfp_t                   sk_allocation;
122 -       u32                     sk_pacing_rate; /* bytes per second */
123 -       u32                     sk_max_pacing_rate;
124 -       netdev_features_t       sk_route_caps;
125 -       netdev_features_t       sk_route_nocaps;
126 -       int                     sk_gso_type;
127 -       unsigned int            sk_gso_max_size;
128         u16                     sk_gso_max_segs;
129 -       int                     sk_rcvlowat;
130         unsigned long           sk_lingertime;
131 -       struct sk_buff_head     sk_error_queue;
132         struct proto            *sk_prot_creator;
133         rwlock_t                sk_callback_lock;
134         int                     sk_err,
135                                 sk_err_soft;
136         u32                     sk_ack_backlog;
137         u32                     sk_max_ack_backlog;
138 -       __u32                   sk_priority;
139 -       __u32                   sk_mark;
140         struct pid              *sk_peer_pid;
141         const struct cred       *sk_peer_cred;
142         long                    sk_rcvtimeo;
143 -       long                    sk_sndtimeo;
144 -       struct timer_list       sk_timer;
145         ktime_t                 sk_stamp;
146  #if BITS_PER_LONG==32
147         seqlock_t               sk_stamp_seq;
148 @@ -434,10 +441,6 @@ struct sock {
149         u32                     sk_tskey;
150         struct socket           *sk_socket;
151         void                    *sk_user_data;
152 -       struct page_frag        sk_frag;
153 -       struct sk_buff          *sk_send_head;
154 -       __s32                   sk_peek_off;
155 -       int                     sk_write_pending;
156  #ifdef CONFIG_SECURITY
157         void                    *sk_security;
158  #endif