v1.5 branch refresh based upon upstream master @ c8677ca89e53e3be7988d54280fce166cc894a7e
[librecmc/librecmc.git] / target / linux / generic / backport-4.14 / 353-v4.18-netfilter-nf_flow_table-move-ipv4-offload-hook-code-.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Date: Sat, 17 Feb 2018 11:49:44 +0100
3 Subject: [PATCH] netfilter: nf_flow_table: move ipv4 offload hook code to
4  nf_flow_table
5
6 Allows some minor code sharing with the ipv6 hook code and is also
7 useful as preparation for adding iptables support for offload
8
9 Signed-off-by: Felix Fietkau <nbd@nbd.name>
10 ---
11  create mode 100644 net/netfilter/nf_flow_table_ip.c
12
13 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
14 +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
15 @@ -2,248 +2,8 @@
16  #include <linux/init.h>
17  #include <linux/module.h>
18  #include <linux/netfilter.h>
19 -#include <linux/rhashtable.h>
20 -#include <linux/ip.h>
21 -#include <linux/netdevice.h>
22 -#include <net/ip.h>
23 -#include <net/neighbour.h>
24  #include <net/netfilter/nf_flow_table.h>
25  #include <net/netfilter/nf_tables.h>
26 -/* For layer 4 checksum field offset. */
27 -#include <linux/tcp.h>
28 -#include <linux/udp.h>
29 -
30 -static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
31 -                             __be32 addr, __be32 new_addr)
32 -{
33 -       struct tcphdr *tcph;
34 -
35 -       if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
36 -           skb_try_make_writable(skb, thoff + sizeof(*tcph)))
37 -               return -1;
38 -
39 -       tcph = (void *)(skb_network_header(skb) + thoff);
40 -       inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
41 -
42 -       return 0;
43 -}
44 -
45 -static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
46 -                             __be32 addr, __be32 new_addr)
47 -{
48 -       struct udphdr *udph;
49 -
50 -       if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
51 -           skb_try_make_writable(skb, thoff + sizeof(*udph)))
52 -               return -1;
53 -
54 -       udph = (void *)(skb_network_header(skb) + thoff);
55 -       if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
56 -               inet_proto_csum_replace4(&udph->check, skb, addr,
57 -                                        new_addr, true);
58 -               if (!udph->check)
59 -                       udph->check = CSUM_MANGLED_0;
60 -       }
61 -
62 -       return 0;
63 -}
64 -
65 -static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
66 -                                 unsigned int thoff, __be32 addr,
67 -                                 __be32 new_addr)
68 -{
69 -       switch (iph->protocol) {
70 -       case IPPROTO_TCP:
71 -               if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
72 -                       return NF_DROP;
73 -               break;
74 -       case IPPROTO_UDP:
75 -               if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
76 -                       return NF_DROP;
77 -               break;
78 -       }
79 -
80 -       return 0;
81 -}
82 -
83 -static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
84 -                          struct iphdr *iph, unsigned int thoff,
85 -                          enum flow_offload_tuple_dir dir)
86 -{
87 -       __be32 addr, new_addr;
88 -
89 -       switch (dir) {
90 -       case FLOW_OFFLOAD_DIR_ORIGINAL:
91 -               addr = iph->saddr;
92 -               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
93 -               iph->saddr = new_addr;
94 -               break;
95 -       case FLOW_OFFLOAD_DIR_REPLY:
96 -               addr = iph->daddr;
97 -               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
98 -               iph->daddr = new_addr;
99 -               break;
100 -       default:
101 -               return -1;
102 -       }
103 -       csum_replace4(&iph->check, addr, new_addr);
104 -
105 -       return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
106 -}
107 -
108 -static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
109 -                          struct iphdr *iph, unsigned int thoff,
110 -                          enum flow_offload_tuple_dir dir)
111 -{
112 -       __be32 addr, new_addr;
113 -
114 -       switch (dir) {
115 -       case FLOW_OFFLOAD_DIR_ORIGINAL:
116 -               addr = iph->daddr;
117 -               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
118 -               iph->daddr = new_addr;
119 -               break;
120 -       case FLOW_OFFLOAD_DIR_REPLY:
121 -               addr = iph->saddr;
122 -               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
123 -               iph->saddr = new_addr;
124 -               break;
125 -       default:
126 -               return -1;
127 -       }
128 -
129 -       return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
130 -}
131 -
132 -static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
133 -                         enum flow_offload_tuple_dir dir)
134 -{
135 -       struct iphdr *iph = ip_hdr(skb);
136 -       unsigned int thoff = iph->ihl * 4;
137 -
138 -       if (flow->flags & FLOW_OFFLOAD_SNAT &&
139 -           (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
140 -            nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
141 -               return -1;
142 -       if (flow->flags & FLOW_OFFLOAD_DNAT &&
143 -           (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
144 -            nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
145 -               return -1;
146 -
147 -       return 0;
148 -}
149 -
150 -static bool ip_has_options(unsigned int thoff)
151 -{
152 -       return thoff != sizeof(struct iphdr);
153 -}
154 -
155 -static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
156 -                           struct flow_offload_tuple *tuple)
157 -{
158 -       struct flow_ports *ports;
159 -       unsigned int thoff;
160 -       struct iphdr *iph;
161 -
162 -       if (!pskb_may_pull(skb, sizeof(*iph)))
163 -               return -1;
164 -
165 -       iph = ip_hdr(skb);
166 -       thoff = iph->ihl * 4;
167 -
168 -       if (ip_is_fragment(iph) ||
169 -           unlikely(ip_has_options(thoff)))
170 -               return -1;
171 -
172 -       if (iph->protocol != IPPROTO_TCP &&
173 -           iph->protocol != IPPROTO_UDP)
174 -               return -1;
175 -
176 -       thoff = iph->ihl * 4;
177 -       if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
178 -               return -1;
179 -
180 -       ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
181 -
182 -       tuple->src_v4.s_addr    = iph->saddr;
183 -       tuple->dst_v4.s_addr    = iph->daddr;
184 -       tuple->src_port         = ports->source;
185 -       tuple->dst_port         = ports->dest;
186 -       tuple->l3proto          = AF_INET;
187 -       tuple->l4proto          = iph->protocol;
188 -       tuple->iifidx           = dev->ifindex;
189 -
190 -       return 0;
191 -}
192 -
193 -/* Based on ip_exceeds_mtu(). */
194 -static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
195 -{
196 -       if (skb->len <= mtu)
197 -               return false;
198 -
199 -       if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
200 -               return false;
201 -
202 -       if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
203 -               return false;
204 -
205 -       return true;
206 -}
207 -
208 -unsigned int
209 -nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
210 -                       const struct nf_hook_state *state)
211 -{
212 -       struct flow_offload_tuple_rhash *tuplehash;
213 -       struct nf_flowtable *flow_table = priv;
214 -       struct flow_offload_tuple tuple = {};
215 -       enum flow_offload_tuple_dir dir;
216 -       struct flow_offload *flow;
217 -       struct net_device *outdev;
218 -       const struct rtable *rt;
219 -       struct iphdr *iph;
220 -       __be32 nexthop;
221 -
222 -       if (skb->protocol != htons(ETH_P_IP))
223 -               return NF_ACCEPT;
224 -
225 -       if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
226 -               return NF_ACCEPT;
227 -
228 -       tuplehash = flow_offload_lookup(flow_table, &tuple);
229 -       if (tuplehash == NULL)
230 -               return NF_ACCEPT;
231 -
232 -       outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
233 -       if (!outdev)
234 -               return NF_ACCEPT;
235 -
236 -       dir = tuplehash->tuple.dir;
237 -       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
238 -       rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
239 -
240 -       if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
241 -               return NF_ACCEPT;
242 -
243 -       if (skb_try_make_writable(skb, sizeof(*iph)))
244 -               return NF_DROP;
245 -
246 -       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
247 -           nf_flow_nat_ip(flow, skb, dir) < 0)
248 -               return NF_DROP;
249 -
250 -       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
251 -       iph = ip_hdr(skb);
252 -       ip_decrease_ttl(iph);
253 -
254 -       skb->dev = outdev;
255 -       nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
256 -       neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
257 -
258 -       return NF_STOLEN;
259 -}
260 -EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
261  
262  static struct nf_flowtable_type flowtable_ipv4 = {
263         .family         = NFPROTO_IPV4,
264 --- a/net/netfilter/Makefile
265 +++ b/net/netfilter/Makefile
266 @@ -113,7 +113,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV)        += nft_fwd_
267  
268  # flow table infrastructure
269  obj-$(CONFIG_NF_FLOW_TABLE)    += nf_flow_table.o
270 -nf_flow_table-objs := nf_flow_table_core.o
271 +nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
272  
273  obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
274  
275 --- /dev/null
276 +++ b/net/netfilter/nf_flow_table_ip.c
277 @@ -0,0 +1,245 @@
278 +#include <linux/kernel.h>
279 +#include <linux/init.h>
280 +#include <linux/module.h>
281 +#include <linux/netfilter.h>
282 +#include <linux/rhashtable.h>
283 +#include <linux/ip.h>
284 +#include <linux/netdevice.h>
285 +#include <net/ip.h>
286 +#include <net/neighbour.h>
287 +#include <net/netfilter/nf_flow_table.h>
288 +/* For layer 4 checksum field offset. */
289 +#include <linux/tcp.h>
290 +#include <linux/udp.h>
291 +
292 +static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
293 +                             __be32 addr, __be32 new_addr)
294 +{
295 +       struct tcphdr *tcph;
296 +
297 +       if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
298 +           skb_try_make_writable(skb, thoff + sizeof(*tcph)))
299 +               return -1;
300 +
301 +       tcph = (void *)(skb_network_header(skb) + thoff);
302 +       inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
303 +
304 +       return 0;
305 +}
306 +
307 +static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
308 +                             __be32 addr, __be32 new_addr)
309 +{
310 +       struct udphdr *udph;
311 +
312 +       if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
313 +           skb_try_make_writable(skb, thoff + sizeof(*udph)))
314 +               return -1;
315 +
316 +       udph = (void *)(skb_network_header(skb) + thoff);
317 +       if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
318 +               inet_proto_csum_replace4(&udph->check, skb, addr,
319 +                                        new_addr, true);
320 +               if (!udph->check)
321 +                       udph->check = CSUM_MANGLED_0;
322 +       }
323 +
324 +       return 0;
325 +}
326 +
327 +static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
328 +                                 unsigned int thoff, __be32 addr,
329 +                                 __be32 new_addr)
330 +{
331 +       switch (iph->protocol) {
332 +       case IPPROTO_TCP:
333 +               if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
334 +                       return NF_DROP;
335 +               break;
336 +       case IPPROTO_UDP:
337 +               if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
338 +                       return NF_DROP;
339 +               break;
340 +       }
341 +
342 +       return 0;
343 +}
344 +
345 +static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
346 +                          struct iphdr *iph, unsigned int thoff,
347 +                          enum flow_offload_tuple_dir dir)
348 +{
349 +       __be32 addr, new_addr;
350 +
351 +       switch (dir) {
352 +       case FLOW_OFFLOAD_DIR_ORIGINAL:
353 +               addr = iph->saddr;
354 +               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
355 +               iph->saddr = new_addr;
356 +               break;
357 +       case FLOW_OFFLOAD_DIR_REPLY:
358 +               addr = iph->daddr;
359 +               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
360 +               iph->daddr = new_addr;
361 +               break;
362 +       default:
363 +               return -1;
364 +       }
365 +       csum_replace4(&iph->check, addr, new_addr);
366 +
367 +       return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
368 +}
369 +
370 +static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
371 +                          struct iphdr *iph, unsigned int thoff,
372 +                          enum flow_offload_tuple_dir dir)
373 +{
374 +       __be32 addr, new_addr;
375 +
376 +       switch (dir) {
377 +       case FLOW_OFFLOAD_DIR_ORIGINAL:
378 +               addr = iph->daddr;
379 +               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
380 +               iph->daddr = new_addr;
381 +               break;
382 +       case FLOW_OFFLOAD_DIR_REPLY:
383 +               addr = iph->saddr;
384 +               new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
385 +               iph->saddr = new_addr;
386 +               break;
387 +       default:
388 +               return -1;
389 +       }
390 +
391 +       return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
392 +}
393 +
394 +static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
395 +                         enum flow_offload_tuple_dir dir)
396 +{
397 +       struct iphdr *iph = ip_hdr(skb);
398 +       unsigned int thoff = iph->ihl * 4;
399 +
400 +       if (flow->flags & FLOW_OFFLOAD_SNAT &&
401 +           (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
402 +            nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
403 +               return -1;
404 +       if (flow->flags & FLOW_OFFLOAD_DNAT &&
405 +           (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
406 +            nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
407 +               return -1;
408 +
409 +       return 0;
410 +}
411 +
412 +static bool ip_has_options(unsigned int thoff)
413 +{
414 +       return thoff != sizeof(struct iphdr);
415 +}
416 +
417 +static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
418 +                           struct flow_offload_tuple *tuple)
419 +{
420 +       struct flow_ports *ports;
421 +       unsigned int thoff;
422 +       struct iphdr *iph;
423 +
424 +       if (!pskb_may_pull(skb, sizeof(*iph)))
425 +               return -1;
426 +
427 +       iph = ip_hdr(skb);
428 +       thoff = iph->ihl * 4;
429 +
430 +       if (ip_is_fragment(iph) ||
431 +           unlikely(ip_has_options(thoff)))
432 +               return -1;
433 +
434 +       if (iph->protocol != IPPROTO_TCP &&
435 +           iph->protocol != IPPROTO_UDP)
436 +               return -1;
437 +
438 +       thoff = iph->ihl * 4;
439 +       if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
440 +               return -1;
441 +
442 +       ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
443 +
444 +       tuple->src_v4.s_addr    = iph->saddr;
445 +       tuple->dst_v4.s_addr    = iph->daddr;
446 +       tuple->src_port         = ports->source;
447 +       tuple->dst_port         = ports->dest;
448 +       tuple->l3proto          = AF_INET;
449 +       tuple->l4proto          = iph->protocol;
450 +       tuple->iifidx           = dev->ifindex;
451 +
452 +       return 0;
453 +}
454 +
455 +/* Based on ip_exceeds_mtu(). */
456 +static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
457 +{
458 +       if (skb->len <= mtu)
459 +               return false;
460 +
461 +       if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
462 +               return false;
463 +
464 +       if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
465 +               return false;
466 +
467 +       return true;
468 +}
469 +
470 +unsigned int
471 +nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
472 +                       const struct nf_hook_state *state)
473 +{
474 +       struct flow_offload_tuple_rhash *tuplehash;
475 +       struct nf_flowtable *flow_table = priv;
476 +       struct flow_offload_tuple tuple = {};
477 +       enum flow_offload_tuple_dir dir;
478 +       struct flow_offload *flow;
479 +       struct net_device *outdev;
480 +       const struct rtable *rt;
481 +       struct iphdr *iph;
482 +       __be32 nexthop;
483 +
484 +       if (skb->protocol != htons(ETH_P_IP))
485 +               return NF_ACCEPT;
486 +
487 +       if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
488 +               return NF_ACCEPT;
489 +
490 +       tuplehash = flow_offload_lookup(flow_table, &tuple);
491 +       if (tuplehash == NULL)
492 +               return NF_ACCEPT;
493 +
494 +       outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
495 +       if (!outdev)
496 +               return NF_ACCEPT;
497 +
498 +       dir = tuplehash->tuple.dir;
499 +       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
500 +       rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
501 +
502 +       if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
503 +               return NF_ACCEPT;
504 +
505 +       if (skb_try_make_writable(skb, sizeof(*iph)))
506 +               return NF_DROP;
507 +
508 +       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
509 +           nf_flow_nat_ip(flow, skb, dir) < 0)
510 +               return NF_DROP;
511 +
512 +       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
513 +       iph = ip_hdr(skb);
514 +       ip_decrease_ttl(iph);
515 +
516 +       skb->dev = outdev;
517 +       nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
518 +       neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
519 +
520 +       return NF_STOLEN;
521 +}
522 +EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);