ff8aafb2553982ee400c280c1ddb402de0344f97
[oweals/openwrt.git] /
1 From: Pablo Neira Ayuso <pablo@netfilter.org>
2 Date: Sun, 7 Jan 2018 01:04:07 +0100
3 Subject: [PATCH] netfilter: nf_tables: add flow table netlink frontend
4
5 This patch introduces a netlink control plane to create, delete and dump
6 flow tables. Flow tables are identified by name, this name is used from
7 rules to refer to an specific flow table. Flow tables use the rhashtable
8 class and a generic garbage collector to remove expired entries.
9
10 This also adds the infrastructure to add different flow table types, so
11 we can add one for each layer 3 protocol family.
12
13 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
14 ---
15  create mode 100644 include/net/netfilter/nf_flow_table.h
16
17 --- /dev/null
18 +++ b/include/net/netfilter/nf_flow_table.h
19 @@ -0,0 +1,23 @@
20 +#ifndef _NF_FLOW_TABLE_H
21 +#define _NF_FLOW_TABLE_H
22 +
23 +#include <linux/rhashtable.h>
24 +
25 +struct nf_flowtable;
26 +
27 +struct nf_flowtable_type {
28 +       struct list_head                list;
29 +       int                             family;
30 +       void                            (*gc)(struct work_struct *work);
31 +       const struct rhashtable_params  *params;
32 +       nf_hookfn                       *hook;
33 +       struct module                   *owner;
34 +};
35 +
36 +struct nf_flowtable {
37 +       struct rhashtable               rhashtable;
38 +       const struct nf_flowtable_type  *type;
39 +       struct delayed_work             gc_work;
40 +};
41 +
42 +#endif /* _FLOW_OFFLOAD_H */
43 --- a/include/net/netfilter/nf_tables.h
44 +++ b/include/net/netfilter/nf_tables.h
45 @@ -9,6 +9,7 @@
46  #include <linux/netfilter/x_tables.h>
47  #include <linux/netfilter/nf_tables.h>
48  #include <linux/u64_stats_sync.h>
49 +#include <net/netfilter/nf_flow_table.h>
50  #include <net/netlink.h>
51  
52  #define NFT_JUMP_STACK_SIZE    16
53 @@ -938,6 +939,7 @@ unsigned int nft_do_chain(struct nft_pkt
54   *     @chains: chains in the table
55   *     @sets: sets in the table
56   *     @objects: stateful objects in the table
57 + *     @flowtables: flow tables in the table
58   *     @hgenerator: handle generator state
59   *     @use: number of chain references to this table
60   *     @flags: table flag (see enum nft_table_flags)
61 @@ -949,6 +951,7 @@ struct nft_table {
62         struct list_head                chains;
63         struct list_head                sets;
64         struct list_head                objects;
65 +       struct list_head                flowtables;
66         u64                             hgenerator;
67         u32                             use;
68         u16                             flags:14,
69 @@ -1080,6 +1083,44 @@ int nft_register_obj(struct nft_object_t
70  void nft_unregister_obj(struct nft_object_type *obj_type);
71  
72  /**
73 + *     struct nft_flowtable - nf_tables flow table
74 + *
75 + *     @list: flow table list node in table list
76 + *     @table: the table the flow table is contained in
77 + *     @name: name of this flow table
78 + *     @hooknum: hook number
79 + *     @priority: hook priority
80 + *     @ops_len: number of hooks in array
81 + *     @genmask: generation mask
82 + *     @use: number of references to this flow table
83 + *     @data: rhashtable and garbage collector
84 + *     @ops: array of hooks
85 + */
86 +struct nft_flowtable {
87 +       struct list_head                list;
88 +       struct nft_table                *table;
89 +       char                            *name;
90 +       int                             hooknum;
91 +       int                             priority;
92 +       int                             ops_len;
93 +       u32                             genmask:2,
94 +                                       use:30;
95 +       /* runtime data below here */
96 +       struct nf_hook_ops              *ops ____cacheline_aligned;
97 +       struct nf_flowtable             data;
98 +};
99 +
100 +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
101 +                                                const struct nlattr *nla,
102 +                                                u8 genmask);
103 +void nft_flow_table_iterate(struct net *net,
104 +                           void (*iter)(struct nf_flowtable *flowtable, void *data),
105 +                           void *data);
106 +
107 +void nft_register_flowtable_type(struct nf_flowtable_type *type);
108 +void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
109 +
110 +/**
111   *     struct nft_traceinfo - nft tracing information and state
112   *
113   *     @pkt: pktinfo currently processed
114 @@ -1315,4 +1356,11 @@ struct nft_trans_obj {
115  #define nft_trans_obj(trans)   \
116         (((struct nft_trans_obj *)trans->data)->obj)
117  
118 +struct nft_trans_flowtable {
119 +       struct nft_flowtable            *flowtable;
120 +};
121 +
122 +#define nft_trans_flowtable(trans)     \
123 +       (((struct nft_trans_flowtable *)trans->data)->flowtable)
124 +
125  #endif /* _NET_NF_TABLES_H */
126 --- a/include/uapi/linux/netfilter/nf_tables.h
127 +++ b/include/uapi/linux/netfilter/nf_tables.h
128 @@ -92,6 +92,9 @@ enum nft_verdicts {
129   * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes)
130   * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes)
131   * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes)
132 + * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes)
133 + * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes)
134 + * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes)
135   */
136  enum nf_tables_msg_types {
137         NFT_MSG_NEWTABLE,
138 @@ -116,6 +119,9 @@ enum nf_tables_msg_types {
139         NFT_MSG_GETOBJ,
140         NFT_MSG_DELOBJ,
141         NFT_MSG_GETOBJ_RESET,
142 +       NFT_MSG_NEWFLOWTABLE,
143 +       NFT_MSG_GETFLOWTABLE,
144 +       NFT_MSG_DELFLOWTABLE,
145         NFT_MSG_MAX,
146  };
147  
148 @@ -1310,6 +1316,53 @@ enum nft_object_attributes {
149  #define NFTA_OBJ_MAX           (__NFTA_OBJ_MAX - 1)
150  
151  /**
152 + * enum nft_flowtable_attributes - nf_tables flow table netlink attributes
153 + *
154 + * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING)
155 + * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING)
156 + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
157 + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
158 + */
159 +enum nft_flowtable_attributes {
160 +       NFTA_FLOWTABLE_UNSPEC,
161 +       NFTA_FLOWTABLE_TABLE,
162 +       NFTA_FLOWTABLE_NAME,
163 +       NFTA_FLOWTABLE_HOOK,
164 +       NFTA_FLOWTABLE_USE,
165 +       __NFTA_FLOWTABLE_MAX
166 +};
167 +#define NFTA_FLOWTABLE_MAX     (__NFTA_FLOWTABLE_MAX - 1)
168 +
169 +/**
170 + * enum nft_flowtable_hook_attributes - nf_tables flow table hook netlink attributes
171 + *
172 + * @NFTA_FLOWTABLE_HOOK_NUM: netfilter hook number (NLA_U32)
173 + * @NFTA_FLOWTABLE_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
174 + * @NFTA_FLOWTABLE_HOOK_DEVS: input devices this flow table is bound to (NLA_NESTED)
175 + */
176 +enum nft_flowtable_hook_attributes {
177 +       NFTA_FLOWTABLE_HOOK_UNSPEC,
178 +       NFTA_FLOWTABLE_HOOK_NUM,
179 +       NFTA_FLOWTABLE_HOOK_PRIORITY,
180 +       NFTA_FLOWTABLE_HOOK_DEVS,
181 +       __NFTA_FLOWTABLE_HOOK_MAX
182 +};
183 +#define NFTA_FLOWTABLE_HOOK_MAX        (__NFTA_FLOWTABLE_HOOK_MAX - 1)
184 +
185 +/**
186 + * enum nft_device_attributes - nf_tables device netlink attributes
187 + *
188 + * @NFTA_DEVICE_NAME: name of this device (NLA_STRING)
189 + */
190 +enum nft_devices_attributes {
191 +       NFTA_DEVICE_UNSPEC,
192 +       NFTA_DEVICE_NAME,
193 +       __NFTA_DEVICE_MAX
194 +};
195 +#define NFTA_DEVICE_MAX                (__NFTA_DEVICE_MAX - 1)
196 +
197 +
198 +/**
199   * enum nft_trace_attributes - nf_tables trace netlink attributes
200   *
201   * @NFTA_TRACE_TABLE: name of the table (NLA_STRING)
202 --- a/net/netfilter/nf_tables_api.c
203 +++ b/net/netfilter/nf_tables_api.c
204 @@ -17,6 +17,7 @@
205  #include <linux/netfilter.h>
206  #include <linux/netfilter/nfnetlink.h>
207  #include <linux/netfilter/nf_tables.h>
208 +#include <net/netfilter/nf_flow_table.h>
209  #include <net/netfilter/nf_tables_core.h>
210  #include <net/netfilter/nf_tables.h>
211  #include <net/net_namespace.h>
212 @@ -24,6 +25,7 @@
213  
214  static LIST_HEAD(nf_tables_expressions);
215  static LIST_HEAD(nf_tables_objects);
216 +static LIST_HEAD(nf_tables_flowtables);
217  
218  /**
219   *     nft_register_afinfo - register nf_tables address family info
220 @@ -389,6 +391,40 @@ static int nft_delobj(struct nft_ctx *ct
221         return err;
222  }
223  
224 +static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
225 +                                  struct nft_flowtable *flowtable)
226 +{
227 +       struct nft_trans *trans;
228 +
229 +       trans = nft_trans_alloc(ctx, msg_type,
230 +                               sizeof(struct nft_trans_flowtable));
231 +       if (trans == NULL)
232 +               return -ENOMEM;
233 +
234 +       if (msg_type == NFT_MSG_NEWFLOWTABLE)
235 +               nft_activate_next(ctx->net, flowtable);
236 +
237 +       nft_trans_flowtable(trans) = flowtable;
238 +       list_add_tail(&trans->list, &ctx->net->nft.commit_list);
239 +
240 +       return 0;
241 +}
242 +
243 +static int nft_delflowtable(struct nft_ctx *ctx,
244 +                           struct nft_flowtable *flowtable)
245 +{
246 +       int err;
247 +
248 +       err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
249 +       if (err < 0)
250 +               return err;
251 +
252 +       nft_deactivate_next(ctx->net, flowtable);
253 +       ctx->table->use--;
254 +
255 +       return err;
256 +}
257 +
258  /*
259   * Tables
260   */
261 @@ -772,6 +808,7 @@ static int nf_tables_newtable(struct net
262         INIT_LIST_HEAD(&table->chains);
263         INIT_LIST_HEAD(&table->sets);
264         INIT_LIST_HEAD(&table->objects);
265 +       INIT_LIST_HEAD(&table->flowtables);
266         table->flags = flags;
267  
268         nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
269 @@ -793,10 +830,11 @@ err1:
270  
271  static int nft_flush_table(struct nft_ctx *ctx)
272  {
273 -       int err;
274 +       struct nft_flowtable *flowtable, *nft;
275         struct nft_chain *chain, *nc;
276         struct nft_object *obj, *ne;
277         struct nft_set *set, *ns;
278 +       int err;
279  
280         list_for_each_entry(chain, &ctx->table->chains, list) {
281                 if (!nft_is_active_next(ctx->net, chain))
282 @@ -822,6 +860,12 @@ static int nft_flush_table(struct nft_ct
283                         goto out;
284         }
285  
286 +       list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
287 +               err = nft_delflowtable(ctx, flowtable);
288 +               if (err < 0)
289 +                       goto out;
290 +       }
291 +
292         list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
293                 err = nft_delobj(ctx, obj);
294                 if (err < 0)
295 @@ -4853,6 +4897,605 @@ static void nf_tables_obj_notify(const s
296                        ctx->afi->family, ctx->report, GFP_KERNEL);
297  }
298  
299 +/*
300 + * Flow tables
301 + */
302 +void nft_register_flowtable_type(struct nf_flowtable_type *type)
303 +{
304 +       nfnl_lock(NFNL_SUBSYS_NFTABLES);
305 +       list_add_tail_rcu(&type->list, &nf_tables_flowtables);
306 +       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
307 +}
308 +EXPORT_SYMBOL_GPL(nft_register_flowtable_type);
309 +
310 +void nft_unregister_flowtable_type(struct nf_flowtable_type *type)
311 +{
312 +       nfnl_lock(NFNL_SUBSYS_NFTABLES);
313 +       list_del_rcu(&type->list);
314 +       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
315 +}
316 +EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type);
317 +
318 +static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
319 +       [NFTA_FLOWTABLE_TABLE]          = { .type = NLA_STRING,
320 +                                           .len = NFT_NAME_MAXLEN - 1 },
321 +       [NFTA_FLOWTABLE_NAME]           = { .type = NLA_STRING,
322 +                                           .len = NFT_NAME_MAXLEN - 1 },
323 +       [NFTA_FLOWTABLE_HOOK]           = { .type = NLA_NESTED },
324 +};
325 +
326 +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
327 +                                                const struct nlattr *nla,
328 +                                                u8 genmask)
329 +{
330 +       struct nft_flowtable *flowtable;
331 +
332 +       list_for_each_entry(flowtable, &table->flowtables, list) {
333 +               if (!nla_strcmp(nla, flowtable->name) &&
334 +                   nft_active_genmask(flowtable, genmask))
335 +                       return flowtable;
336 +       }
337 +       return ERR_PTR(-ENOENT);
338 +}
339 +EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
340 +
341 +#define NFT_FLOWTABLE_DEVICE_MAX       8
342 +
343 +static int nf_tables_parse_devices(const struct nft_ctx *ctx,
344 +                                  const struct nlattr *attr,
345 +                                  struct net_device *dev_array[], int *len)
346 +{
347 +       const struct nlattr *tmp;
348 +       struct net_device *dev;
349 +       char ifname[IFNAMSIZ];
350 +       int rem, n = 0, err;
351 +
352 +       nla_for_each_nested(tmp, attr, rem) {
353 +               if (nla_type(tmp) != NFTA_DEVICE_NAME) {
354 +                       err = -EINVAL;
355 +                       goto err1;
356 +               }
357 +
358 +               nla_strlcpy(ifname, tmp, IFNAMSIZ);
359 +               dev = dev_get_by_name(ctx->net, ifname);
360 +               if (!dev) {
361 +                       err = -ENOENT;
362 +                       goto err1;
363 +               }
364 +
365 +               dev_array[n++] = dev;
366 +               if (n == NFT_FLOWTABLE_DEVICE_MAX) {
367 +                       err = -EFBIG;
368 +                       goto err1;
369 +               }
370 +       }
371 +       if (!len)
372 +               return -EINVAL;
373 +
374 +       err = 0;
375 +err1:
376 +       *len = n;
377 +       return err;
378 +}
379 +
380 +static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
381 +       [NFTA_FLOWTABLE_HOOK_NUM]       = { .type = NLA_U32 },
382 +       [NFTA_FLOWTABLE_HOOK_PRIORITY]  = { .type = NLA_U32 },
383 +       [NFTA_FLOWTABLE_HOOK_DEVS]      = { .type = NLA_NESTED },
384 +};
385 +
386 +static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
387 +                                         const struct nlattr *attr,
388 +                                         struct nft_flowtable *flowtable)
389 +{
390 +       struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
391 +       struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
392 +       struct nf_hook_ops *ops;
393 +       int hooknum, priority;
394 +       int err, n = 0, i;
395 +
396 +       err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
397 +                              nft_flowtable_hook_policy, NULL);
398 +       if (err < 0)
399 +               return err;
400 +
401 +       if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
402 +           !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] ||
403 +           !tb[NFTA_FLOWTABLE_HOOK_DEVS])
404 +               return -EINVAL;
405 +
406 +       hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
407 +       if (hooknum >= ctx->afi->nhooks)
408 +               return -EINVAL;
409 +
410 +       priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
411 +
412 +       err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
413 +                                     dev_array, &n);
414 +       if (err < 0)
415 +               goto err1;
416 +
417 +       ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
418 +       if (!ops) {
419 +               err = -ENOMEM;
420 +               goto err1;
421 +       }
422 +
423 +       flowtable->ops          = ops;
424 +       flowtable->ops_len      = n;
425 +
426 +       for (i = 0; i < n; i++) {
427 +               flowtable->ops[i].pf            = NFPROTO_NETDEV;
428 +               flowtable->ops[i].hooknum       = hooknum;
429 +               flowtable->ops[i].priority      = priority;
430 +               flowtable->ops[i].priv          = &flowtable->data.rhashtable;
431 +               flowtable->ops[i].hook          = flowtable->data.type->hook;
432 +               flowtable->ops[i].dev           = dev_array[i];
433 +       }
434 +
435 +       err = 0;
436 +err1:
437 +       for (i = 0; i < n; i++)
438 +               dev_put(dev_array[i]);
439 +
440 +       return err;
441 +}
442 +
443 +static const struct nf_flowtable_type *
444 +__nft_flowtable_type_get(const struct nft_af_info *afi)
445 +{
446 +       const struct nf_flowtable_type *type;
447 +
448 +       list_for_each_entry(type, &nf_tables_flowtables, list) {
449 +               if (afi->family == type->family)
450 +                       return type;
451 +       }
452 +       return NULL;
453 +}
454 +
455 +static const struct nf_flowtable_type *
456 +nft_flowtable_type_get(const struct nft_af_info *afi)
457 +{
458 +       const struct nf_flowtable_type *type;
459 +
460 +       type = __nft_flowtable_type_get(afi);
461 +       if (type != NULL && try_module_get(type->owner))
462 +               return type;
463 +
464 +#ifdef CONFIG_MODULES
465 +       if (type == NULL) {
466 +               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
467 +               request_module("nf-flowtable-%u", afi->family);
468 +               nfnl_lock(NFNL_SUBSYS_NFTABLES);
469 +               if (__nft_flowtable_type_get(afi))
470 +                       return ERR_PTR(-EAGAIN);
471 +       }
472 +#endif
473 +       return ERR_PTR(-ENOENT);
474 +}
475 +
476 +void nft_flow_table_iterate(struct net *net,
477 +                           void (*iter)(struct nf_flowtable *flowtable, void *data),
478 +                           void *data)
479 +{
480 +       struct nft_flowtable *flowtable;
481 +       const struct nft_af_info *afi;
482 +       const struct nft_table *table;
483 +
484 +       rcu_read_lock();
485 +       list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
486 +               list_for_each_entry_rcu(table, &afi->tables, list) {
487 +                       list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
488 +                               iter(&flowtable->data, data);
489 +                       }
490 +               }
491 +       }
492 +       rcu_read_unlock();
493 +}
494 +EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
495 +
496 +static void nft_unregister_flowtable_net_hooks(struct net *net,
497 +                                              struct nft_flowtable *flowtable)
498 +{
499 +       int i;
500 +
501 +       for (i = 0; i < flowtable->ops_len; i++) {
502 +               if (!flowtable->ops[i].dev)
503 +                       continue;
504 +
505 +               nf_unregister_net_hook(net, &flowtable->ops[i]);
506 +       }
507 +}
508 +
509 +static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
510 +                                 struct sk_buff *skb,
511 +                                 const struct nlmsghdr *nlh,
512 +                                 const struct nlattr * const nla[],
513 +                                 struct netlink_ext_ack *extack)
514 +{
515 +       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
516 +       const struct nf_flowtable_type *type;
517 +       u8 genmask = nft_genmask_next(net);
518 +       int family = nfmsg->nfgen_family;
519 +       struct nft_flowtable *flowtable;
520 +       struct nft_af_info *afi;
521 +       struct nft_table *table;
522 +       struct nft_ctx ctx;
523 +       int err, i, k;
524 +
525 +       if (!nla[NFTA_FLOWTABLE_TABLE] ||
526 +           !nla[NFTA_FLOWTABLE_NAME] ||
527 +           !nla[NFTA_FLOWTABLE_HOOK])
528 +               return -EINVAL;
529 +
530 +       afi = nf_tables_afinfo_lookup(net, family, true);
531 +       if (IS_ERR(afi))
532 +               return PTR_ERR(afi);
533 +
534 +       table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
535 +       if (IS_ERR(table))
536 +               return PTR_ERR(table);
537 +
538 +       flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
539 +                                              genmask);
540 +       if (IS_ERR(flowtable)) {
541 +               err = PTR_ERR(flowtable);
542 +               if (err != -ENOENT)
543 +                       return err;
544 +       } else {
545 +               if (nlh->nlmsg_flags & NLM_F_EXCL)
546 +                       return -EEXIST;
547 +
548 +               return 0;
549 +       }
550 +
551 +       nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
552 +
553 +       flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
554 +       if (!flowtable)
555 +               return -ENOMEM;
556 +
557 +       flowtable->table = table;
558 +       flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
559 +       if (!flowtable->name) {
560 +               err = -ENOMEM;
561 +               goto err1;
562 +       }
563 +
564 +       type = nft_flowtable_type_get(afi);
565 +       if (IS_ERR(type)) {
566 +               err = PTR_ERR(type);
567 +               goto err2;
568 +       }
569 +
570 +       flowtable->data.type = type;
571 +       err = rhashtable_init(&flowtable->data.rhashtable, type->params);
572 +       if (err < 0)
573 +               goto err3;
574 +
575 +       err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
576 +                                            flowtable);
577 +       if (err < 0)
578 +               goto err3;
579 +
580 +       for (i = 0; i < flowtable->ops_len; i++) {
581 +               err = nf_register_net_hook(net, &flowtable->ops[i]);
582 +               if (err < 0)
583 +                       goto err4;
584 +       }
585 +
586 +       err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
587 +       if (err < 0)
588 +               goto err5;
589 +
590 +       INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
591 +       queue_delayed_work(system_power_efficient_wq,
592 +                          &flowtable->data.gc_work, HZ);
593 +
594 +       list_add_tail_rcu(&flowtable->list, &table->flowtables);
595 +       table->use++;
596 +
597 +       return 0;
598 +err5:
599 +       i = flowtable->ops_len;
600 +err4:
601 +       for (k = i - 1; k >= 0; k--)
602 +               nf_unregister_net_hook(net, &flowtable->ops[i]);
603 +
604 +       kfree(flowtable->ops);
605 +err3:
606 +       module_put(type->owner);
607 +err2:
608 +       kfree(flowtable->name);
609 +err1:
610 +       kfree(flowtable);
611 +       return err;
612 +}
613 +
614 +static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
615 +                                 struct sk_buff *skb,
616 +                                 const struct nlmsghdr *nlh,
617 +                                 const struct nlattr * const nla[],
618 +                                 struct netlink_ext_ack *extack)
619 +{
620 +       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
621 +       u8 genmask = nft_genmask_next(net);
622 +       int family = nfmsg->nfgen_family;
623 +       struct nft_flowtable *flowtable;
624 +       struct nft_af_info *afi;
625 +       struct nft_table *table;
626 +       struct nft_ctx ctx;
627 +
628 +       afi = nf_tables_afinfo_lookup(net, family, true);
629 +       if (IS_ERR(afi))
630 +               return PTR_ERR(afi);
631 +
632 +       table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
633 +       if (IS_ERR(table))
634 +               return PTR_ERR(table);
635 +
636 +       flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
637 +                                              genmask);
638 +       if (IS_ERR(flowtable))
639 +                return PTR_ERR(flowtable);
640 +       if (flowtable->use > 0)
641 +               return -EBUSY;
642 +
643 +       nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
644 +
645 +       return nft_delflowtable(&ctx, flowtable);
646 +}
647 +
648 +static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
649 +                                        u32 portid, u32 seq, int event,
650 +                                        u32 flags, int family,
651 +                                        struct nft_flowtable *flowtable)
652 +{
653 +       struct nlattr *nest, *nest_devs;
654 +       struct nfgenmsg *nfmsg;
655 +       struct nlmsghdr *nlh;
656 +       int i;
657 +
658 +       event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
659 +       nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
660 +       if (nlh == NULL)
661 +               goto nla_put_failure;
662 +
663 +       nfmsg = nlmsg_data(nlh);
664 +       nfmsg->nfgen_family     = family;
665 +       nfmsg->version          = NFNETLINK_V0;
666 +       nfmsg->res_id           = htons(net->nft.base_seq & 0xffff);
667 +
668 +       if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
669 +           nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
670 +           nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)))
671 +               goto nla_put_failure;
672 +
673 +       nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
674 +       if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
675 +           nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
676 +               goto nla_put_failure;
677 +
678 +       nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS);
679 +       if (!nest_devs)
680 +               goto nla_put_failure;
681 +
682 +       for (i = 0; i < flowtable->ops_len; i++) {
683 +               if (flowtable->ops[i].dev &&
684 +                   nla_put_string(skb, NFTA_DEVICE_NAME,
685 +                                  flowtable->ops[i].dev->name))
686 +                       goto nla_put_failure;
687 +       }
688 +       nla_nest_end(skb, nest_devs);
689 +       nla_nest_end(skb, nest);
690 +
691 +       nlmsg_end(skb, nlh);
692 +       return 0;
693 +
694 +nla_put_failure:
695 +       nlmsg_trim(skb, nlh);
696 +       return -1;
697 +}
698 +
699 +struct nft_flowtable_filter {
700 +       char            *table;
701 +};
702 +
703 +static int nf_tables_dump_flowtable(struct sk_buff *skb,
704 +                                   struct netlink_callback *cb)
705 +{
706 +       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
707 +       struct nft_flowtable_filter *filter = cb->data;
708 +       unsigned int idx = 0, s_idx = cb->args[0];
709 +       struct net *net = sock_net(skb->sk);
710 +       int family = nfmsg->nfgen_family;
711 +       struct nft_flowtable *flowtable;
712 +       const struct nft_af_info *afi;
713 +       const struct nft_table *table;
714 +
715 +       rcu_read_lock();
716 +       cb->seq = net->nft.base_seq;
717 +
718 +       list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
719 +               if (family != NFPROTO_UNSPEC && family != afi->family)
720 +                       continue;
721 +
722 +               list_for_each_entry_rcu(table, &afi->tables, list) {
723 +                       list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
724 +                               if (!nft_is_active(net, flowtable))
725 +                                       goto cont;
726 +                               if (idx < s_idx)
727 +                                       goto cont;
728 +                               if (idx > s_idx)
729 +                                       memset(&cb->args[1], 0,
730 +                                              sizeof(cb->args) - sizeof(cb->args[0]));
731 +                               if (filter && filter->table[0] &&
732 +                                   strcmp(filter->table, table->name))
733 +                                       goto cont;
734 +
735 +                               if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
736 +                                                                 cb->nlh->nlmsg_seq,
737 +                                                                 NFT_MSG_NEWFLOWTABLE,
738 +                                                                 NLM_F_MULTI | NLM_F_APPEND,
739 +                                                                 afi->family, flowtable) < 0)
740 +                                       goto done;
741 +
742 +                               nl_dump_check_consistent(cb, nlmsg_hdr(skb));
743 +cont:
744 +                               idx++;
745 +                       }
746 +               }
747 +       }
748 +done:
749 +       rcu_read_unlock();
750 +
751 +       cb->args[0] = idx;
752 +       return skb->len;
753 +}
754 +
755 +static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
756 +{
757 +       struct nft_flowtable_filter *filter = cb->data;
758 +
759 +       if (!filter)
760 +               return 0;
761 +
762 +       kfree(filter->table);
763 +       kfree(filter);
764 +
765 +       return 0;
766 +}
767 +
768 +static struct nft_flowtable_filter *
769 +nft_flowtable_filter_alloc(const struct nlattr * const nla[])
770 +{
771 +       struct nft_flowtable_filter *filter;
772 +
773 +       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
774 +       if (!filter)
775 +               return ERR_PTR(-ENOMEM);
776 +
777 +       if (nla[NFTA_FLOWTABLE_TABLE]) {
778 +               filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
779 +                                          GFP_KERNEL);
780 +               if (!filter->table) {
781 +                       kfree(filter);
782 +                       return ERR_PTR(-ENOMEM);
783 +               }
784 +       }
785 +       return filter;
786 +}
787 +
788 +static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
789 +                                 struct sk_buff *skb,
790 +                                 const struct nlmsghdr *nlh,
791 +                                 const struct nlattr * const nla[],
792 +                                 struct netlink_ext_ack *extack)
793 +{
794 +       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
795 +       u8 genmask = nft_genmask_cur(net);
796 +       int family = nfmsg->nfgen_family;
797 +       struct nft_flowtable *flowtable;
798 +       const struct nft_af_info *afi;
799 +       const struct nft_table *table;
800 +       struct sk_buff *skb2;
801 +       int err;
802 +
803 +       if (nlh->nlmsg_flags & NLM_F_DUMP) {
804 +               struct netlink_dump_control c = {
805 +                       .dump = nf_tables_dump_flowtable,
806 +                       .done = nf_tables_dump_flowtable_done,
807 +               };
808 +
809 +               if (nla[NFTA_FLOWTABLE_TABLE]) {
810 +                       struct nft_flowtable_filter *filter;
811 +
812 +                       filter = nft_flowtable_filter_alloc(nla);
813 +                       if (IS_ERR(filter))
814 +                               return -ENOMEM;
815 +
816 +                       c.data = filter;
817 +               }
818 +               return netlink_dump_start(nlsk, skb, nlh, &c);
819 +       }
820 +
821 +       if (!nla[NFTA_FLOWTABLE_NAME])
822 +               return -EINVAL;
823 +
824 +       afi = nf_tables_afinfo_lookup(net, family, false);
825 +       if (IS_ERR(afi))
826 +               return PTR_ERR(afi);
827 +
828 +       table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
829 +       if (IS_ERR(table))
830 +               return PTR_ERR(table);
831 +
832 +       flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
833 +                                              genmask);
834 +       if (IS_ERR(table))
835 +               return PTR_ERR(flowtable);
836 +
837 +       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
838 +       if (!skb2)
839 +               return -ENOMEM;
840 +
841 +       err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
842 +                                           nlh->nlmsg_seq,
843 +                                           NFT_MSG_NEWFLOWTABLE, 0, family,
844 +                                           flowtable);
845 +       if (err < 0)
846 +               goto err;
847 +
848 +       return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
849 +err:
850 +       kfree_skb(skb2);
851 +       return err;
852 +}
853 +
854 +static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
855 +                                      struct nft_flowtable *flowtable,
856 +                                      int event)
857 +{
858 +       struct sk_buff *skb;
859 +       int err;
860 +
861 +       if (ctx->report &&
862 +           !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
863 +               return;
864 +
865 +       skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
866 +       if (skb == NULL)
867 +               goto err;
868 +
869 +       err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
870 +                                           ctx->seq, event, 0,
871 +                                           ctx->afi->family, flowtable);
872 +       if (err < 0) {
873 +               kfree_skb(skb);
874 +               goto err;
875 +       }
876 +
877 +       nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
878 +                      ctx->report, GFP_KERNEL);
879 +       return;
880 +err:
881 +       nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
882 +}
883 +
884 +static void nft_flowtable_destroy(void *ptr, void *arg)
885 +{
886 +       kfree(ptr);
887 +}
888 +
889 +static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
890 +{
891 +       cancel_delayed_work_sync(&flowtable->data.gc_work);
892 +       kfree(flowtable->name);
893 +       rhashtable_free_and_destroy(&flowtable->data.rhashtable,
894 +                                   nft_flowtable_destroy, NULL);
895 +       module_put(flowtable->data.type->owner);
896 +}
897 +
898  static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
899                                    u32 portid, u32 seq)
900  {
901 @@ -4883,6 +5526,49 @@ nla_put_failure:
902         return -EMSGSIZE;
903  }
904  
905 +static void nft_flowtable_event(unsigned long event, struct net_device *dev,
906 +                               struct nft_flowtable *flowtable)
907 +{
908 +       int i;
909 +
910 +       for (i = 0; i < flowtable->ops_len; i++) {
911 +               if (flowtable->ops[i].dev != dev)
912 +                       continue;
913 +
914 +               nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
915 +               flowtable->ops[i].dev = NULL;
916 +               break;
917 +       }
918 +}
919 +
920 +static int nf_tables_flowtable_event(struct notifier_block *this,
921 +                                    unsigned long event, void *ptr)
922 +{
923 +       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
924 +       struct nft_flowtable *flowtable;
925 +       struct nft_table *table;
926 +       struct nft_af_info *afi;
927 +
928 +       if (event != NETDEV_UNREGISTER)
929 +               return 0;
930 +
931 +       nfnl_lock(NFNL_SUBSYS_NFTABLES);
932 +       list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
933 +               list_for_each_entry(table, &afi->tables, list) {
934 +                       list_for_each_entry(flowtable, &table->flowtables, list) {
935 +                               nft_flowtable_event(event, dev, flowtable);
936 +                       }
937 +               }
938 +       }
939 +       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
940 +
941 +       return NOTIFY_DONE;
942 +}
943 +
944 +static struct notifier_block nf_tables_flowtable_notifier = {
945 +       .notifier_call  = nf_tables_flowtable_event,
946 +};
947 +
948  static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
949                                  int event)
950  {
951 @@ -5035,6 +5721,21 @@ static const struct nfnl_callback nf_tab
952                 .attr_count     = NFTA_OBJ_MAX,
953                 .policy         = nft_obj_policy,
954         },
955 +       [NFT_MSG_NEWFLOWTABLE] = {
956 +               .call_batch     = nf_tables_newflowtable,
957 +               .attr_count     = NFTA_FLOWTABLE_MAX,
958 +               .policy         = nft_flowtable_policy,
959 +       },
960 +       [NFT_MSG_GETFLOWTABLE] = {
961 +               .call           = nf_tables_getflowtable,
962 +               .attr_count     = NFTA_FLOWTABLE_MAX,
963 +               .policy         = nft_flowtable_policy,
964 +       },
965 +       [NFT_MSG_DELFLOWTABLE] = {
966 +               .call_batch     = nf_tables_delflowtable,
967 +               .attr_count     = NFTA_FLOWTABLE_MAX,
968 +               .policy         = nft_flowtable_policy,
969 +       },
970  };
971  
972  static void nft_chain_commit_update(struct nft_trans *trans)
973 @@ -5083,6 +5784,9 @@ static void nf_tables_commit_release(str
974         case NFT_MSG_DELOBJ:
975                 nft_obj_destroy(nft_trans_obj(trans));
976                 break;
977 +       case NFT_MSG_DELFLOWTABLE:
978 +               nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
979 +               break;
980         }
981         kfree(trans);
982  }
983 @@ -5202,6 +5906,21 @@ static int nf_tables_commit(struct net *
984                         nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
985                                              NFT_MSG_DELOBJ);
986                         break;
987 +               case NFT_MSG_NEWFLOWTABLE:
988 +                       nft_clear(net, nft_trans_flowtable(trans));
989 +                       nf_tables_flowtable_notify(&trans->ctx,
990 +                                                  nft_trans_flowtable(trans),
991 +                                                  NFT_MSG_NEWFLOWTABLE);
992 +                       nft_trans_destroy(trans);
993 +                       break;
994 +               case NFT_MSG_DELFLOWTABLE:
995 +                       list_del_rcu(&nft_trans_flowtable(trans)->list);
996 +                       nf_tables_flowtable_notify(&trans->ctx,
997 +                                                  nft_trans_flowtable(trans),
998 +                                                  NFT_MSG_DELFLOWTABLE);
999 +                       nft_unregister_flowtable_net_hooks(net,
1000 +                                       nft_trans_flowtable(trans));
1001 +                       break;
1002                 }
1003         }
1004  
1005 @@ -5239,6 +5958,9 @@ static void nf_tables_abort_release(stru
1006         case NFT_MSG_NEWOBJ:
1007                 nft_obj_destroy(nft_trans_obj(trans));
1008                 break;
1009 +       case NFT_MSG_NEWFLOWTABLE:
1010 +               nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
1011 +               break;
1012         }
1013         kfree(trans);
1014  }
1015 @@ -5330,6 +6052,17 @@ static int nf_tables_abort(struct net *n
1016                         nft_clear(trans->ctx.net, nft_trans_obj(trans));
1017                         nft_trans_destroy(trans);
1018                         break;
1019 +               case NFT_MSG_NEWFLOWTABLE:
1020 +                       trans->ctx.table->use--;
1021 +                       list_del_rcu(&nft_trans_flowtable(trans)->list);
1022 +                       nft_unregister_flowtable_net_hooks(net,
1023 +                                       nft_trans_flowtable(trans));
1024 +                       break;
1025 +               case NFT_MSG_DELFLOWTABLE:
1026 +                       trans->ctx.table->use++;
1027 +                       nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
1028 +                       nft_trans_destroy(trans);
1029 +                       break;
1030                 }
1031         }
1032  
1033 @@ -5880,6 +6613,7 @@ EXPORT_SYMBOL_GPL(__nft_release_basechai
1034  /* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
1035  static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
1036  {
1037 +       struct nft_flowtable *flowtable, *nf;
1038         struct nft_table *table, *nt;
1039         struct nft_chain *chain, *nc;
1040         struct nft_object *obj, *ne;
1041 @@ -5893,6 +6627,9 @@ static void __nft_release_afinfo(struct
1042         list_for_each_entry_safe(table, nt, &afi->tables, list) {
1043                 list_for_each_entry(chain, &table->chains, list)
1044                         nf_tables_unregister_hook(net, table, chain);
1045 +               list_for_each_entry(flowtable, &table->flowtables, list)
1046 +                       nf_unregister_net_hooks(net, flowtable->ops,
1047 +                                               flowtable->ops_len);
1048                 /* No packets are walking on these chains anymore. */
1049                 ctx.table = table;
1050                 list_for_each_entry(chain, &table->chains, list) {
1051 @@ -5903,6 +6640,11 @@ static void __nft_release_afinfo(struct
1052                                 nf_tables_rule_release(&ctx, rule);
1053                         }
1054                 }
1055 +               list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
1056 +                       list_del(&flowtable->list);
1057 +                       table->use--;
1058 +                       nf_tables_flowtable_destroy(flowtable);
1059 +               }
1060                 list_for_each_entry_safe(set, ns, &table->sets, list) {
1061                         list_del(&set->list);
1062                         table->use--;
1063 @@ -5946,6 +6688,8 @@ static int __init nf_tables_module_init(
1064         if (err < 0)
1065                 goto err3;
1066  
1067 +       register_netdevice_notifier(&nf_tables_flowtable_notifier);
1068 +
1069         pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
1070         return register_pernet_subsys(&nf_tables_net_ops);
1071  err3:
1072 @@ -5960,6 +6704,7 @@ static void __exit nf_tables_module_exit
1073  {
1074         unregister_pernet_subsys(&nf_tables_net_ops);
1075         nfnetlink_subsys_unregister(&nf_tables_subsys);
1076 +       unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
1077         rcu_barrier();
1078         nf_tables_core_module_exit();
1079         kfree(info);