Linux-libre 5.3.12-gnu
[librecmc/linux-libre.git] / fs / btrfs / delayed-inode.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2011 Fujitsu.  All rights reserved.
4  * Written by Miao Xie <miaox@cn.fujitsu.com>
5  */
6
7 #include <linux/slab.h>
8 #include <linux/iversion.h>
9 #include "delayed-inode.h"
10 #include "disk-io.h"
11 #include "transaction.h"
12 #include "ctree.h"
13 #include "qgroup.h"
14
15 #define BTRFS_DELAYED_WRITEBACK         512
16 #define BTRFS_DELAYED_BACKGROUND        128
17 #define BTRFS_DELAYED_BATCH             16
18
19 static struct kmem_cache *delayed_node_cache;
20
21 int __init btrfs_delayed_inode_init(void)
22 {
23         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
24                                         sizeof(struct btrfs_delayed_node),
25                                         0,
26                                         SLAB_MEM_SPREAD,
27                                         NULL);
28         if (!delayed_node_cache)
29                 return -ENOMEM;
30         return 0;
31 }
32
33 void __cold btrfs_delayed_inode_exit(void)
34 {
35         kmem_cache_destroy(delayed_node_cache);
36 }
37
38 static inline void btrfs_init_delayed_node(
39                                 struct btrfs_delayed_node *delayed_node,
40                                 struct btrfs_root *root, u64 inode_id)
41 {
42         delayed_node->root = root;
43         delayed_node->inode_id = inode_id;
44         refcount_set(&delayed_node->refs, 0);
45         delayed_node->ins_root = RB_ROOT_CACHED;
46         delayed_node->del_root = RB_ROOT_CACHED;
47         mutex_init(&delayed_node->mutex);
48         INIT_LIST_HEAD(&delayed_node->n_list);
49         INIT_LIST_HEAD(&delayed_node->p_list);
50 }
51
52 static inline int btrfs_is_continuous_delayed_item(
53                                         struct btrfs_delayed_item *item1,
54                                         struct btrfs_delayed_item *item2)
55 {
56         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
57             item1->key.objectid == item2->key.objectid &&
58             item1->key.type == item2->key.type &&
59             item1->key.offset + 1 == item2->key.offset)
60                 return 1;
61         return 0;
62 }
63
64 static struct btrfs_delayed_node *btrfs_get_delayed_node(
65                 struct btrfs_inode *btrfs_inode)
66 {
67         struct btrfs_root *root = btrfs_inode->root;
68         u64 ino = btrfs_ino(btrfs_inode);
69         struct btrfs_delayed_node *node;
70
71         node = READ_ONCE(btrfs_inode->delayed_node);
72         if (node) {
73                 refcount_inc(&node->refs);
74                 return node;
75         }
76
77         spin_lock(&root->inode_lock);
78         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
79
80         if (node) {
81                 if (btrfs_inode->delayed_node) {
82                         refcount_inc(&node->refs);      /* can be accessed */
83                         BUG_ON(btrfs_inode->delayed_node != node);
84                         spin_unlock(&root->inode_lock);
85                         return node;
86                 }
87
88                 /*
89                  * It's possible that we're racing into the middle of removing
90                  * this node from the radix tree.  In this case, the refcount
91                  * was zero and it should never go back to one.  Just return
92                  * NULL like it was never in the radix at all; our release
93                  * function is in the process of removing it.
94                  *
95                  * Some implementations of refcount_inc refuse to bump the
96                  * refcount once it has hit zero.  If we don't do this dance
97                  * here, refcount_inc() may decide to just WARN_ONCE() instead
98                  * of actually bumping the refcount.
99                  *
100                  * If this node is properly in the radix, we want to bump the
101                  * refcount twice, once for the inode and once for this get
102                  * operation.
103                  */
104                 if (refcount_inc_not_zero(&node->refs)) {
105                         refcount_inc(&node->refs);
106                         btrfs_inode->delayed_node = node;
107                 } else {
108                         node = NULL;
109                 }
110
111                 spin_unlock(&root->inode_lock);
112                 return node;
113         }
114         spin_unlock(&root->inode_lock);
115
116         return NULL;
117 }
118
119 /* Will return either the node or PTR_ERR(-ENOMEM) */
120 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
121                 struct btrfs_inode *btrfs_inode)
122 {
123         struct btrfs_delayed_node *node;
124         struct btrfs_root *root = btrfs_inode->root;
125         u64 ino = btrfs_ino(btrfs_inode);
126         int ret;
127
128 again:
129         node = btrfs_get_delayed_node(btrfs_inode);
130         if (node)
131                 return node;
132
133         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
134         if (!node)
135                 return ERR_PTR(-ENOMEM);
136         btrfs_init_delayed_node(node, root, ino);
137
138         /* cached in the btrfs inode and can be accessed */
139         refcount_set(&node->refs, 2);
140
141         ret = radix_tree_preload(GFP_NOFS);
142         if (ret) {
143                 kmem_cache_free(delayed_node_cache, node);
144                 return ERR_PTR(ret);
145         }
146
147         spin_lock(&root->inode_lock);
148         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
149         if (ret == -EEXIST) {
150                 spin_unlock(&root->inode_lock);
151                 kmem_cache_free(delayed_node_cache, node);
152                 radix_tree_preload_end();
153                 goto again;
154         }
155         btrfs_inode->delayed_node = node;
156         spin_unlock(&root->inode_lock);
157         radix_tree_preload_end();
158
159         return node;
160 }
161
162 /*
163  * Call it when holding delayed_node->mutex
164  *
165  * If mod = 1, add this node into the prepared list.
166  */
167 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
168                                      struct btrfs_delayed_node *node,
169                                      int mod)
170 {
171         spin_lock(&root->lock);
172         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
173                 if (!list_empty(&node->p_list))
174                         list_move_tail(&node->p_list, &root->prepare_list);
175                 else if (mod)
176                         list_add_tail(&node->p_list, &root->prepare_list);
177         } else {
178                 list_add_tail(&node->n_list, &root->node_list);
179                 list_add_tail(&node->p_list, &root->prepare_list);
180                 refcount_inc(&node->refs);      /* inserted into list */
181                 root->nodes++;
182                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
183         }
184         spin_unlock(&root->lock);
185 }
186
187 /* Call it when holding delayed_node->mutex */
188 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
189                                        struct btrfs_delayed_node *node)
190 {
191         spin_lock(&root->lock);
192         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
193                 root->nodes--;
194                 refcount_dec(&node->refs);      /* not in the list */
195                 list_del_init(&node->n_list);
196                 if (!list_empty(&node->p_list))
197                         list_del_init(&node->p_list);
198                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
199         }
200         spin_unlock(&root->lock);
201 }
202
203 static struct btrfs_delayed_node *btrfs_first_delayed_node(
204                         struct btrfs_delayed_root *delayed_root)
205 {
206         struct list_head *p;
207         struct btrfs_delayed_node *node = NULL;
208
209         spin_lock(&delayed_root->lock);
210         if (list_empty(&delayed_root->node_list))
211                 goto out;
212
213         p = delayed_root->node_list.next;
214         node = list_entry(p, struct btrfs_delayed_node, n_list);
215         refcount_inc(&node->refs);
216 out:
217         spin_unlock(&delayed_root->lock);
218
219         return node;
220 }
221
222 static struct btrfs_delayed_node *btrfs_next_delayed_node(
223                                                 struct btrfs_delayed_node *node)
224 {
225         struct btrfs_delayed_root *delayed_root;
226         struct list_head *p;
227         struct btrfs_delayed_node *next = NULL;
228
229         delayed_root = node->root->fs_info->delayed_root;
230         spin_lock(&delayed_root->lock);
231         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
232                 /* not in the list */
233                 if (list_empty(&delayed_root->node_list))
234                         goto out;
235                 p = delayed_root->node_list.next;
236         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
237                 goto out;
238         else
239                 p = node->n_list.next;
240
241         next = list_entry(p, struct btrfs_delayed_node, n_list);
242         refcount_inc(&next->refs);
243 out:
244         spin_unlock(&delayed_root->lock);
245
246         return next;
247 }
248
249 static void __btrfs_release_delayed_node(
250                                 struct btrfs_delayed_node *delayed_node,
251                                 int mod)
252 {
253         struct btrfs_delayed_root *delayed_root;
254
255         if (!delayed_node)
256                 return;
257
258         delayed_root = delayed_node->root->fs_info->delayed_root;
259
260         mutex_lock(&delayed_node->mutex);
261         if (delayed_node->count)
262                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
263         else
264                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
265         mutex_unlock(&delayed_node->mutex);
266
267         if (refcount_dec_and_test(&delayed_node->refs)) {
268                 struct btrfs_root *root = delayed_node->root;
269
270                 spin_lock(&root->inode_lock);
271                 /*
272                  * Once our refcount goes to zero, nobody is allowed to bump it
273                  * back up.  We can delete it now.
274                  */
275                 ASSERT(refcount_read(&delayed_node->refs) == 0);
276                 radix_tree_delete(&root->delayed_nodes_tree,
277                                   delayed_node->inode_id);
278                 spin_unlock(&root->inode_lock);
279                 kmem_cache_free(delayed_node_cache, delayed_node);
280         }
281 }
282
283 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
284 {
285         __btrfs_release_delayed_node(node, 0);
286 }
287
288 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
289                                         struct btrfs_delayed_root *delayed_root)
290 {
291         struct list_head *p;
292         struct btrfs_delayed_node *node = NULL;
293
294         spin_lock(&delayed_root->lock);
295         if (list_empty(&delayed_root->prepare_list))
296                 goto out;
297
298         p = delayed_root->prepare_list.next;
299         list_del_init(p);
300         node = list_entry(p, struct btrfs_delayed_node, p_list);
301         refcount_inc(&node->refs);
302 out:
303         spin_unlock(&delayed_root->lock);
304
305         return node;
306 }
307
308 static inline void btrfs_release_prepared_delayed_node(
309                                         struct btrfs_delayed_node *node)
310 {
311         __btrfs_release_delayed_node(node, 1);
312 }
313
314 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
315 {
316         struct btrfs_delayed_item *item;
317         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
318         if (item) {
319                 item->data_len = data_len;
320                 item->ins_or_del = 0;
321                 item->bytes_reserved = 0;
322                 item->delayed_node = NULL;
323                 refcount_set(&item->refs, 1);
324         }
325         return item;
326 }
327
328 /*
329  * __btrfs_lookup_delayed_item - look up the delayed item by key
330  * @delayed_node: pointer to the delayed node
331  * @key:          the key to look up
332  * @prev:         used to store the prev item if the right item isn't found
333  * @next:         used to store the next item if the right item isn't found
334  *
335  * Note: if we don't find the right item, we will return the prev item and
336  * the next item.
337  */
338 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
339                                 struct rb_root *root,
340                                 struct btrfs_key *key,
341                                 struct btrfs_delayed_item **prev,
342                                 struct btrfs_delayed_item **next)
343 {
344         struct rb_node *node, *prev_node = NULL;
345         struct btrfs_delayed_item *delayed_item = NULL;
346         int ret = 0;
347
348         node = root->rb_node;
349
350         while (node) {
351                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
352                                         rb_node);
353                 prev_node = node;
354                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
355                 if (ret < 0)
356                         node = node->rb_right;
357                 else if (ret > 0)
358                         node = node->rb_left;
359                 else
360                         return delayed_item;
361         }
362
363         if (prev) {
364                 if (!prev_node)
365                         *prev = NULL;
366                 else if (ret < 0)
367                         *prev = delayed_item;
368                 else if ((node = rb_prev(prev_node)) != NULL) {
369                         *prev = rb_entry(node, struct btrfs_delayed_item,
370                                          rb_node);
371                 } else
372                         *prev = NULL;
373         }
374
375         if (next) {
376                 if (!prev_node)
377                         *next = NULL;
378                 else if (ret > 0)
379                         *next = delayed_item;
380                 else if ((node = rb_next(prev_node)) != NULL) {
381                         *next = rb_entry(node, struct btrfs_delayed_item,
382                                          rb_node);
383                 } else
384                         *next = NULL;
385         }
386         return NULL;
387 }
388
389 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
390                                         struct btrfs_delayed_node *delayed_node,
391                                         struct btrfs_key *key)
392 {
393         return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key,
394                                            NULL, NULL);
395 }
396
397 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
398                                     struct btrfs_delayed_item *ins,
399                                     int action)
400 {
401         struct rb_node **p, *node;
402         struct rb_node *parent_node = NULL;
403         struct rb_root_cached *root;
404         struct btrfs_delayed_item *item;
405         int cmp;
406         bool leftmost = true;
407
408         if (action == BTRFS_DELAYED_INSERTION_ITEM)
409                 root = &delayed_node->ins_root;
410         else if (action == BTRFS_DELAYED_DELETION_ITEM)
411                 root = &delayed_node->del_root;
412         else
413                 BUG();
414         p = &root->rb_root.rb_node;
415         node = &ins->rb_node;
416
417         while (*p) {
418                 parent_node = *p;
419                 item = rb_entry(parent_node, struct btrfs_delayed_item,
420                                  rb_node);
421
422                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
423                 if (cmp < 0) {
424                         p = &(*p)->rb_right;
425                         leftmost = false;
426                 } else if (cmp > 0) {
427                         p = &(*p)->rb_left;
428                 } else {
429                         return -EEXIST;
430                 }
431         }
432
433         rb_link_node(node, parent_node, p);
434         rb_insert_color_cached(node, root, leftmost);
435         ins->delayed_node = delayed_node;
436         ins->ins_or_del = action;
437
438         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
439             action == BTRFS_DELAYED_INSERTION_ITEM &&
440             ins->key.offset >= delayed_node->index_cnt)
441                         delayed_node->index_cnt = ins->key.offset + 1;
442
443         delayed_node->count++;
444         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
445         return 0;
446 }
447
448 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
449                                               struct btrfs_delayed_item *item)
450 {
451         return __btrfs_add_delayed_item(node, item,
452                                         BTRFS_DELAYED_INSERTION_ITEM);
453 }
454
455 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
456                                              struct btrfs_delayed_item *item)
457 {
458         return __btrfs_add_delayed_item(node, item,
459                                         BTRFS_DELAYED_DELETION_ITEM);
460 }
461
462 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
463 {
464         int seq = atomic_inc_return(&delayed_root->items_seq);
465
466         /* atomic_dec_return implies a barrier */
467         if ((atomic_dec_return(&delayed_root->items) <
468             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
469                 cond_wake_up_nomb(&delayed_root->wait);
470 }
471
472 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
473 {
474         struct rb_root_cached *root;
475         struct btrfs_delayed_root *delayed_root;
476
477         /* Not associated with any delayed_node */
478         if (!delayed_item->delayed_node)
479                 return;
480         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
481
482         BUG_ON(!delayed_root);
483         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
484                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
485
486         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
487                 root = &delayed_item->delayed_node->ins_root;
488         else
489                 root = &delayed_item->delayed_node->del_root;
490
491         rb_erase_cached(&delayed_item->rb_node, root);
492         delayed_item->delayed_node->count--;
493
494         finish_one_item(delayed_root);
495 }
496
497 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
498 {
499         if (item) {
500                 __btrfs_remove_delayed_item(item);
501                 if (refcount_dec_and_test(&item->refs))
502                         kfree(item);
503         }
504 }
505
506 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
507                                         struct btrfs_delayed_node *delayed_node)
508 {
509         struct rb_node *p;
510         struct btrfs_delayed_item *item = NULL;
511
512         p = rb_first_cached(&delayed_node->ins_root);
513         if (p)
514                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
515
516         return item;
517 }
518
519 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
520                                         struct btrfs_delayed_node *delayed_node)
521 {
522         struct rb_node *p;
523         struct btrfs_delayed_item *item = NULL;
524
525         p = rb_first_cached(&delayed_node->del_root);
526         if (p)
527                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
528
529         return item;
530 }
531
532 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
533                                                 struct btrfs_delayed_item *item)
534 {
535         struct rb_node *p;
536         struct btrfs_delayed_item *next = NULL;
537
538         p = rb_next(&item->rb_node);
539         if (p)
540                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
541
542         return next;
543 }
544
545 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
546                                                struct btrfs_root *root,
547                                                struct btrfs_delayed_item *item)
548 {
549         struct btrfs_block_rsv *src_rsv;
550         struct btrfs_block_rsv *dst_rsv;
551         struct btrfs_fs_info *fs_info = root->fs_info;
552         u64 num_bytes;
553         int ret;
554
555         if (!trans->bytes_reserved)
556                 return 0;
557
558         src_rsv = trans->block_rsv;
559         dst_rsv = &fs_info->delayed_block_rsv;
560
561         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
562
563         /*
564          * Here we migrate space rsv from transaction rsv, since have already
565          * reserved space when starting a transaction.  So no need to reserve
566          * qgroup space here.
567          */
568         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
569         if (!ret) {
570                 trace_btrfs_space_reservation(fs_info, "delayed_item",
571                                               item->key.objectid,
572                                               num_bytes, 1);
573                 item->bytes_reserved = num_bytes;
574         }
575
576         return ret;
577 }
578
579 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
580                                                 struct btrfs_delayed_item *item)
581 {
582         struct btrfs_block_rsv *rsv;
583         struct btrfs_fs_info *fs_info = root->fs_info;
584
585         if (!item->bytes_reserved)
586                 return;
587
588         rsv = &fs_info->delayed_block_rsv;
589         /*
590          * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
591          * to release/reserve qgroup space.
592          */
593         trace_btrfs_space_reservation(fs_info, "delayed_item",
594                                       item->key.objectid, item->bytes_reserved,
595                                       0);
596         btrfs_block_rsv_release(fs_info, rsv,
597                                 item->bytes_reserved);
598 }
599
600 static int btrfs_delayed_inode_reserve_metadata(
601                                         struct btrfs_trans_handle *trans,
602                                         struct btrfs_root *root,
603                                         struct btrfs_inode *inode,
604                                         struct btrfs_delayed_node *node)
605 {
606         struct btrfs_fs_info *fs_info = root->fs_info;
607         struct btrfs_block_rsv *src_rsv;
608         struct btrfs_block_rsv *dst_rsv;
609         u64 num_bytes;
610         int ret;
611
612         src_rsv = trans->block_rsv;
613         dst_rsv = &fs_info->delayed_block_rsv;
614
615         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
616
617         /*
618          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
619          * which doesn't reserve space for speed.  This is a problem since we
620          * still need to reserve space for this update, so try to reserve the
621          * space.
622          *
623          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
624          * we always reserve enough to update the inode item.
625          */
626         if (!src_rsv || (!trans->bytes_reserved &&
627                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
628                 ret = btrfs_qgroup_reserve_meta_prealloc(root,
629                                 fs_info->nodesize, true);
630                 if (ret < 0)
631                         return ret;
632                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
633                                           BTRFS_RESERVE_NO_FLUSH);
634                 /*
635                  * Since we're under a transaction reserve_metadata_bytes could
636                  * try to commit the transaction which will make it return
637                  * EAGAIN to make us stop the transaction we have, so return
638                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
639                  */
640                 if (ret == -EAGAIN) {
641                         ret = -ENOSPC;
642                         btrfs_qgroup_free_meta_prealloc(root, num_bytes);
643                 }
644                 if (!ret) {
645                         node->bytes_reserved = num_bytes;
646                         trace_btrfs_space_reservation(fs_info,
647                                                       "delayed_inode",
648                                                       btrfs_ino(inode),
649                                                       num_bytes, 1);
650                 } else {
651                         btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
652                 }
653                 return ret;
654         }
655
656         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
657         if (!ret) {
658                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
659                                               btrfs_ino(inode), num_bytes, 1);
660                 node->bytes_reserved = num_bytes;
661         }
662
663         return ret;
664 }
665
666 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
667                                                 struct btrfs_delayed_node *node,
668                                                 bool qgroup_free)
669 {
670         struct btrfs_block_rsv *rsv;
671
672         if (!node->bytes_reserved)
673                 return;
674
675         rsv = &fs_info->delayed_block_rsv;
676         trace_btrfs_space_reservation(fs_info, "delayed_inode",
677                                       node->inode_id, node->bytes_reserved, 0);
678         btrfs_block_rsv_release(fs_info, rsv,
679                                 node->bytes_reserved);
680         if (qgroup_free)
681                 btrfs_qgroup_free_meta_prealloc(node->root,
682                                 node->bytes_reserved);
683         else
684                 btrfs_qgroup_convert_reserved_meta(node->root,
685                                 node->bytes_reserved);
686         node->bytes_reserved = 0;
687 }
688
689 /*
690  * This helper will insert some continuous items into the same leaf according
691  * to the free space of the leaf.
692  */
693 static int btrfs_batch_insert_items(struct btrfs_root *root,
694                                     struct btrfs_path *path,
695                                     struct btrfs_delayed_item *item)
696 {
697         struct btrfs_delayed_item *curr, *next;
698         int free_space;
699         int total_data_size = 0, total_size = 0;
700         struct extent_buffer *leaf;
701         char *data_ptr;
702         struct btrfs_key *keys;
703         u32 *data_size;
704         struct list_head head;
705         int slot;
706         int nitems;
707         int i;
708         int ret = 0;
709
710         BUG_ON(!path->nodes[0]);
711
712         leaf = path->nodes[0];
713         free_space = btrfs_leaf_free_space(leaf);
714         INIT_LIST_HEAD(&head);
715
716         next = item;
717         nitems = 0;
718
719         /*
720          * count the number of the continuous items that we can insert in batch
721          */
722         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
723                free_space) {
724                 total_data_size += next->data_len;
725                 total_size += next->data_len + sizeof(struct btrfs_item);
726                 list_add_tail(&next->tree_list, &head);
727                 nitems++;
728
729                 curr = next;
730                 next = __btrfs_next_delayed_item(curr);
731                 if (!next)
732                         break;
733
734                 if (!btrfs_is_continuous_delayed_item(curr, next))
735                         break;
736         }
737
738         if (!nitems) {
739                 ret = 0;
740                 goto out;
741         }
742
743         /*
744          * we need allocate some memory space, but it might cause the task
745          * to sleep, so we set all locked nodes in the path to blocking locks
746          * first.
747          */
748         btrfs_set_path_blocking(path);
749
750         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
751         if (!keys) {
752                 ret = -ENOMEM;
753                 goto out;
754         }
755
756         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
757         if (!data_size) {
758                 ret = -ENOMEM;
759                 goto error;
760         }
761
762         /* get keys of all the delayed items */
763         i = 0;
764         list_for_each_entry(next, &head, tree_list) {
765                 keys[i] = next->key;
766                 data_size[i] = next->data_len;
767                 i++;
768         }
769
770         /* insert the keys of the items */
771         setup_items_for_insert(root, path, keys, data_size,
772                                total_data_size, total_size, nitems);
773
774         /* insert the dir index items */
775         slot = path->slots[0];
776         list_for_each_entry_safe(curr, next, &head, tree_list) {
777                 data_ptr = btrfs_item_ptr(leaf, slot, char);
778                 write_extent_buffer(leaf, &curr->data,
779                                     (unsigned long)data_ptr,
780                                     curr->data_len);
781                 slot++;
782
783                 btrfs_delayed_item_release_metadata(root, curr);
784
785                 list_del(&curr->tree_list);
786                 btrfs_release_delayed_item(curr);
787         }
788
789 error:
790         kfree(data_size);
791         kfree(keys);
792 out:
793         return ret;
794 }
795
796 /*
797  * This helper can just do simple insertion that needn't extend item for new
798  * data, such as directory name index insertion, inode insertion.
799  */
800 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
801                                      struct btrfs_root *root,
802                                      struct btrfs_path *path,
803                                      struct btrfs_delayed_item *delayed_item)
804 {
805         struct extent_buffer *leaf;
806         char *ptr;
807         int ret;
808
809         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
810                                       delayed_item->data_len);
811         if (ret < 0 && ret != -EEXIST)
812                 return ret;
813
814         leaf = path->nodes[0];
815
816         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
817
818         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
819                             delayed_item->data_len);
820         btrfs_mark_buffer_dirty(leaf);
821
822         btrfs_delayed_item_release_metadata(root, delayed_item);
823         return 0;
824 }
825
826 /*
827  * we insert an item first, then if there are some continuous items, we try
828  * to insert those items into the same leaf.
829  */
830 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
831                                       struct btrfs_path *path,
832                                       struct btrfs_root *root,
833                                       struct btrfs_delayed_node *node)
834 {
835         struct btrfs_delayed_item *curr, *prev;
836         int ret = 0;
837
838 do_again:
839         mutex_lock(&node->mutex);
840         curr = __btrfs_first_delayed_insertion_item(node);
841         if (!curr)
842                 goto insert_end;
843
844         ret = btrfs_insert_delayed_item(trans, root, path, curr);
845         if (ret < 0) {
846                 btrfs_release_path(path);
847                 goto insert_end;
848         }
849
850         prev = curr;
851         curr = __btrfs_next_delayed_item(prev);
852         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
853                 /* insert the continuous items into the same leaf */
854                 path->slots[0]++;
855                 btrfs_batch_insert_items(root, path, curr);
856         }
857         btrfs_release_delayed_item(prev);
858         btrfs_mark_buffer_dirty(path->nodes[0]);
859
860         btrfs_release_path(path);
861         mutex_unlock(&node->mutex);
862         goto do_again;
863
864 insert_end:
865         mutex_unlock(&node->mutex);
866         return ret;
867 }
868
869 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
870                                     struct btrfs_root *root,
871                                     struct btrfs_path *path,
872                                     struct btrfs_delayed_item *item)
873 {
874         struct btrfs_delayed_item *curr, *next;
875         struct extent_buffer *leaf;
876         struct btrfs_key key;
877         struct list_head head;
878         int nitems, i, last_item;
879         int ret = 0;
880
881         BUG_ON(!path->nodes[0]);
882
883         leaf = path->nodes[0];
884
885         i = path->slots[0];
886         last_item = btrfs_header_nritems(leaf) - 1;
887         if (i > last_item)
888                 return -ENOENT; /* FIXME: Is errno suitable? */
889
890         next = item;
891         INIT_LIST_HEAD(&head);
892         btrfs_item_key_to_cpu(leaf, &key, i);
893         nitems = 0;
894         /*
895          * count the number of the dir index items that we can delete in batch
896          */
897         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
898                 list_add_tail(&next->tree_list, &head);
899                 nitems++;
900
901                 curr = next;
902                 next = __btrfs_next_delayed_item(curr);
903                 if (!next)
904                         break;
905
906                 if (!btrfs_is_continuous_delayed_item(curr, next))
907                         break;
908
909                 i++;
910                 if (i > last_item)
911                         break;
912                 btrfs_item_key_to_cpu(leaf, &key, i);
913         }
914
915         if (!nitems)
916                 return 0;
917
918         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
919         if (ret)
920                 goto out;
921
922         list_for_each_entry_safe(curr, next, &head, tree_list) {
923                 btrfs_delayed_item_release_metadata(root, curr);
924                 list_del(&curr->tree_list);
925                 btrfs_release_delayed_item(curr);
926         }
927
928 out:
929         return ret;
930 }
931
932 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
933                                       struct btrfs_path *path,
934                                       struct btrfs_root *root,
935                                       struct btrfs_delayed_node *node)
936 {
937         struct btrfs_delayed_item *curr, *prev;
938         int ret = 0;
939
940 do_again:
941         mutex_lock(&node->mutex);
942         curr = __btrfs_first_delayed_deletion_item(node);
943         if (!curr)
944                 goto delete_fail;
945
946         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
947         if (ret < 0)
948                 goto delete_fail;
949         else if (ret > 0) {
950                 /*
951                  * can't find the item which the node points to, so this node
952                  * is invalid, just drop it.
953                  */
954                 prev = curr;
955                 curr = __btrfs_next_delayed_item(prev);
956                 btrfs_release_delayed_item(prev);
957                 ret = 0;
958                 btrfs_release_path(path);
959                 if (curr) {
960                         mutex_unlock(&node->mutex);
961                         goto do_again;
962                 } else
963                         goto delete_fail;
964         }
965
966         btrfs_batch_delete_items(trans, root, path, curr);
967         btrfs_release_path(path);
968         mutex_unlock(&node->mutex);
969         goto do_again;
970
971 delete_fail:
972         btrfs_release_path(path);
973         mutex_unlock(&node->mutex);
974         return ret;
975 }
976
977 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
978 {
979         struct btrfs_delayed_root *delayed_root;
980
981         if (delayed_node &&
982             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
983                 BUG_ON(!delayed_node->root);
984                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
985                 delayed_node->count--;
986
987                 delayed_root = delayed_node->root->fs_info->delayed_root;
988                 finish_one_item(delayed_root);
989         }
990 }
991
992 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
993 {
994         struct btrfs_delayed_root *delayed_root;
995
996         ASSERT(delayed_node->root);
997         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
998         delayed_node->count--;
999
1000         delayed_root = delayed_node->root->fs_info->delayed_root;
1001         finish_one_item(delayed_root);
1002 }
1003
1004 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1005                                         struct btrfs_root *root,
1006                                         struct btrfs_path *path,
1007                                         struct btrfs_delayed_node *node)
1008 {
1009         struct btrfs_fs_info *fs_info = root->fs_info;
1010         struct btrfs_key key;
1011         struct btrfs_inode_item *inode_item;
1012         struct extent_buffer *leaf;
1013         int mod;
1014         int ret;
1015
1016         key.objectid = node->inode_id;
1017         key.type = BTRFS_INODE_ITEM_KEY;
1018         key.offset = 0;
1019
1020         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1021                 mod = -1;
1022         else
1023                 mod = 1;
1024
1025         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1026         if (ret > 0) {
1027                 btrfs_release_path(path);
1028                 return -ENOENT;
1029         } else if (ret < 0) {
1030                 return ret;
1031         }
1032
1033         leaf = path->nodes[0];
1034         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1035                                     struct btrfs_inode_item);
1036         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1037                             sizeof(struct btrfs_inode_item));
1038         btrfs_mark_buffer_dirty(leaf);
1039
1040         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1041                 goto no_iref;
1042
1043         path->slots[0]++;
1044         if (path->slots[0] >= btrfs_header_nritems(leaf))
1045                 goto search;
1046 again:
1047         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1048         if (key.objectid != node->inode_id)
1049                 goto out;
1050
1051         if (key.type != BTRFS_INODE_REF_KEY &&
1052             key.type != BTRFS_INODE_EXTREF_KEY)
1053                 goto out;
1054
1055         /*
1056          * Delayed iref deletion is for the inode who has only one link,
1057          * so there is only one iref. The case that several irefs are
1058          * in the same item doesn't exist.
1059          */
1060         btrfs_del_item(trans, root, path);
1061 out:
1062         btrfs_release_delayed_iref(node);
1063 no_iref:
1064         btrfs_release_path(path);
1065 err_out:
1066         btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
1067         btrfs_release_delayed_inode(node);
1068
1069         return ret;
1070
1071 search:
1072         btrfs_release_path(path);
1073
1074         key.type = BTRFS_INODE_EXTREF_KEY;
1075         key.offset = -1;
1076         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1077         if (ret < 0)
1078                 goto err_out;
1079         ASSERT(ret);
1080
1081         ret = 0;
1082         leaf = path->nodes[0];
1083         path->slots[0]--;
1084         goto again;
1085 }
1086
1087 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1088                                              struct btrfs_root *root,
1089                                              struct btrfs_path *path,
1090                                              struct btrfs_delayed_node *node)
1091 {
1092         int ret;
1093
1094         mutex_lock(&node->mutex);
1095         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1096                 mutex_unlock(&node->mutex);
1097                 return 0;
1098         }
1099
1100         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1101         mutex_unlock(&node->mutex);
1102         return ret;
1103 }
1104
1105 static inline int
1106 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1107                                    struct btrfs_path *path,
1108                                    struct btrfs_delayed_node *node)
1109 {
1110         int ret;
1111
1112         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1113         if (ret)
1114                 return ret;
1115
1116         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1117         if (ret)
1118                 return ret;
1119
1120         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1121         return ret;
1122 }
1123
1124 /*
1125  * Called when committing the transaction.
1126  * Returns 0 on success.
1127  * Returns < 0 on error and returns with an aborted transaction with any
1128  * outstanding delayed items cleaned up.
1129  */
1130 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
1131 {
1132         struct btrfs_fs_info *fs_info = trans->fs_info;
1133         struct btrfs_delayed_root *delayed_root;
1134         struct btrfs_delayed_node *curr_node, *prev_node;
1135         struct btrfs_path *path;
1136         struct btrfs_block_rsv *block_rsv;
1137         int ret = 0;
1138         bool count = (nr > 0);
1139
1140         if (trans->aborted)
1141                 return -EIO;
1142
1143         path = btrfs_alloc_path();
1144         if (!path)
1145                 return -ENOMEM;
1146         path->leave_spinning = 1;
1147
1148         block_rsv = trans->block_rsv;
1149         trans->block_rsv = &fs_info->delayed_block_rsv;
1150
1151         delayed_root = fs_info->delayed_root;
1152
1153         curr_node = btrfs_first_delayed_node(delayed_root);
1154         while (curr_node && (!count || (count && nr--))) {
1155                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1156                                                          curr_node);
1157                 if (ret) {
1158                         btrfs_release_delayed_node(curr_node);
1159                         curr_node = NULL;
1160                         btrfs_abort_transaction(trans, ret);
1161                         break;
1162                 }
1163
1164                 prev_node = curr_node;
1165                 curr_node = btrfs_next_delayed_node(curr_node);
1166                 btrfs_release_delayed_node(prev_node);
1167         }
1168
1169         if (curr_node)
1170                 btrfs_release_delayed_node(curr_node);
1171         btrfs_free_path(path);
1172         trans->block_rsv = block_rsv;
1173
1174         return ret;
1175 }
1176
1177 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
1178 {
1179         return __btrfs_run_delayed_items(trans, -1);
1180 }
1181
1182 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
1183 {
1184         return __btrfs_run_delayed_items(trans, nr);
1185 }
1186
1187 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1188                                      struct btrfs_inode *inode)
1189 {
1190         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1191         struct btrfs_path *path;
1192         struct btrfs_block_rsv *block_rsv;
1193         int ret;
1194
1195         if (!delayed_node)
1196                 return 0;
1197
1198         mutex_lock(&delayed_node->mutex);
1199         if (!delayed_node->count) {
1200                 mutex_unlock(&delayed_node->mutex);
1201                 btrfs_release_delayed_node(delayed_node);
1202                 return 0;
1203         }
1204         mutex_unlock(&delayed_node->mutex);
1205
1206         path = btrfs_alloc_path();
1207         if (!path) {
1208                 btrfs_release_delayed_node(delayed_node);
1209                 return -ENOMEM;
1210         }
1211         path->leave_spinning = 1;
1212
1213         block_rsv = trans->block_rsv;
1214         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1215
1216         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1217
1218         btrfs_release_delayed_node(delayed_node);
1219         btrfs_free_path(path);
1220         trans->block_rsv = block_rsv;
1221
1222         return ret;
1223 }
1224
1225 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1226 {
1227         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1228         struct btrfs_trans_handle *trans;
1229         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1230         struct btrfs_path *path;
1231         struct btrfs_block_rsv *block_rsv;
1232         int ret;
1233
1234         if (!delayed_node)
1235                 return 0;
1236
1237         mutex_lock(&delayed_node->mutex);
1238         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1239                 mutex_unlock(&delayed_node->mutex);
1240                 btrfs_release_delayed_node(delayed_node);
1241                 return 0;
1242         }
1243         mutex_unlock(&delayed_node->mutex);
1244
1245         trans = btrfs_join_transaction(delayed_node->root);
1246         if (IS_ERR(trans)) {
1247                 ret = PTR_ERR(trans);
1248                 goto out;
1249         }
1250
1251         path = btrfs_alloc_path();
1252         if (!path) {
1253                 ret = -ENOMEM;
1254                 goto trans_out;
1255         }
1256         path->leave_spinning = 1;
1257
1258         block_rsv = trans->block_rsv;
1259         trans->block_rsv = &fs_info->delayed_block_rsv;
1260
1261         mutex_lock(&delayed_node->mutex);
1262         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1263                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1264                                                    path, delayed_node);
1265         else
1266                 ret = 0;
1267         mutex_unlock(&delayed_node->mutex);
1268
1269         btrfs_free_path(path);
1270         trans->block_rsv = block_rsv;
1271 trans_out:
1272         btrfs_end_transaction(trans);
1273         btrfs_btree_balance_dirty(fs_info);
1274 out:
1275         btrfs_release_delayed_node(delayed_node);
1276
1277         return ret;
1278 }
1279
1280 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1281 {
1282         struct btrfs_delayed_node *delayed_node;
1283
1284         delayed_node = READ_ONCE(inode->delayed_node);
1285         if (!delayed_node)
1286                 return;
1287
1288         inode->delayed_node = NULL;
1289         btrfs_release_delayed_node(delayed_node);
1290 }
1291
1292 struct btrfs_async_delayed_work {
1293         struct btrfs_delayed_root *delayed_root;
1294         int nr;
1295         struct btrfs_work work;
1296 };
1297
1298 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1299 {
1300         struct btrfs_async_delayed_work *async_work;
1301         struct btrfs_delayed_root *delayed_root;
1302         struct btrfs_trans_handle *trans;
1303         struct btrfs_path *path;
1304         struct btrfs_delayed_node *delayed_node = NULL;
1305         struct btrfs_root *root;
1306         struct btrfs_block_rsv *block_rsv;
1307         int total_done = 0;
1308
1309         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1310         delayed_root = async_work->delayed_root;
1311
1312         path = btrfs_alloc_path();
1313         if (!path)
1314                 goto out;
1315
1316         do {
1317                 if (atomic_read(&delayed_root->items) <
1318                     BTRFS_DELAYED_BACKGROUND / 2)
1319                         break;
1320
1321                 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1322                 if (!delayed_node)
1323                         break;
1324
1325                 path->leave_spinning = 1;
1326                 root = delayed_node->root;
1327
1328                 trans = btrfs_join_transaction(root);
1329                 if (IS_ERR(trans)) {
1330                         btrfs_release_path(path);
1331                         btrfs_release_prepared_delayed_node(delayed_node);
1332                         total_done++;
1333                         continue;
1334                 }
1335
1336                 block_rsv = trans->block_rsv;
1337                 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1338
1339                 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1340
1341                 trans->block_rsv = block_rsv;
1342                 btrfs_end_transaction(trans);
1343                 btrfs_btree_balance_dirty_nodelay(root->fs_info);
1344
1345                 btrfs_release_path(path);
1346                 btrfs_release_prepared_delayed_node(delayed_node);
1347                 total_done++;
1348
1349         } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1350                  || total_done < async_work->nr);
1351
1352         btrfs_free_path(path);
1353 out:
1354         wake_up(&delayed_root->wait);
1355         kfree(async_work);
1356 }
1357
1358
1359 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1360                                      struct btrfs_fs_info *fs_info, int nr)
1361 {
1362         struct btrfs_async_delayed_work *async_work;
1363
1364         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1365         if (!async_work)
1366                 return -ENOMEM;
1367
1368         async_work->delayed_root = delayed_root;
1369         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1370                         btrfs_async_run_delayed_root, NULL, NULL);
1371         async_work->nr = nr;
1372
1373         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1374         return 0;
1375 }
1376
1377 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1378 {
1379         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1380 }
1381
1382 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1383 {
1384         int val = atomic_read(&delayed_root->items_seq);
1385
1386         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1387                 return 1;
1388
1389         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1390                 return 1;
1391
1392         return 0;
1393 }
1394
1395 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1396 {
1397         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1398
1399         if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1400                 btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1401                 return;
1402
1403         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1404                 int seq;
1405                 int ret;
1406
1407                 seq = atomic_read(&delayed_root->items_seq);
1408
1409                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1410                 if (ret)
1411                         return;
1412
1413                 wait_event_interruptible(delayed_root->wait,
1414                                          could_end_wait(delayed_root, seq));
1415                 return;
1416         }
1417
1418         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1419 }
1420
1421 /* Will return 0 or -ENOMEM */
1422 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1423                                    const char *name, int name_len,
1424                                    struct btrfs_inode *dir,
1425                                    struct btrfs_disk_key *disk_key, u8 type,
1426                                    u64 index)
1427 {
1428         struct btrfs_delayed_node *delayed_node;
1429         struct btrfs_delayed_item *delayed_item;
1430         struct btrfs_dir_item *dir_item;
1431         int ret;
1432
1433         delayed_node = btrfs_get_or_create_delayed_node(dir);
1434         if (IS_ERR(delayed_node))
1435                 return PTR_ERR(delayed_node);
1436
1437         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1438         if (!delayed_item) {
1439                 ret = -ENOMEM;
1440                 goto release_node;
1441         }
1442
1443         delayed_item->key.objectid = btrfs_ino(dir);
1444         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1445         delayed_item->key.offset = index;
1446
1447         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1448         dir_item->location = *disk_key;
1449         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1450         btrfs_set_stack_dir_data_len(dir_item, 0);
1451         btrfs_set_stack_dir_name_len(dir_item, name_len);
1452         btrfs_set_stack_dir_type(dir_item, type);
1453         memcpy((char *)(dir_item + 1), name, name_len);
1454
1455         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
1456         /*
1457          * we have reserved enough space when we start a new transaction,
1458          * so reserving metadata failure is impossible
1459          */
1460         BUG_ON(ret);
1461
1462         mutex_lock(&delayed_node->mutex);
1463         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1464         if (unlikely(ret)) {
1465                 btrfs_err(trans->fs_info,
1466                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1467                           name_len, name, delayed_node->root->root_key.objectid,
1468                           delayed_node->inode_id, ret);
1469                 BUG();
1470         }
1471         mutex_unlock(&delayed_node->mutex);
1472
1473 release_node:
1474         btrfs_release_delayed_node(delayed_node);
1475         return ret;
1476 }
1477
1478 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1479                                                struct btrfs_delayed_node *node,
1480                                                struct btrfs_key *key)
1481 {
1482         struct btrfs_delayed_item *item;
1483
1484         mutex_lock(&node->mutex);
1485         item = __btrfs_lookup_delayed_insertion_item(node, key);
1486         if (!item) {
1487                 mutex_unlock(&node->mutex);
1488                 return 1;
1489         }
1490
1491         btrfs_delayed_item_release_metadata(node->root, item);
1492         btrfs_release_delayed_item(item);
1493         mutex_unlock(&node->mutex);
1494         return 0;
1495 }
1496
1497 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1498                                    struct btrfs_inode *dir, u64 index)
1499 {
1500         struct btrfs_delayed_node *node;
1501         struct btrfs_delayed_item *item;
1502         struct btrfs_key item_key;
1503         int ret;
1504
1505         node = btrfs_get_or_create_delayed_node(dir);
1506         if (IS_ERR(node))
1507                 return PTR_ERR(node);
1508
1509         item_key.objectid = btrfs_ino(dir);
1510         item_key.type = BTRFS_DIR_INDEX_KEY;
1511         item_key.offset = index;
1512
1513         ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
1514                                                   &item_key);
1515         if (!ret)
1516                 goto end;
1517
1518         item = btrfs_alloc_delayed_item(0);
1519         if (!item) {
1520                 ret = -ENOMEM;
1521                 goto end;
1522         }
1523
1524         item->key = item_key;
1525
1526         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
1527         /*
1528          * we have reserved enough space when we start a new transaction,
1529          * so reserving metadata failure is impossible.
1530          */
1531         if (ret < 0) {
1532                 btrfs_err(trans->fs_info,
1533 "metadata reservation failed for delayed dir item deltiona, should have been reserved");
1534                 btrfs_release_delayed_item(item);
1535                 goto end;
1536         }
1537
1538         mutex_lock(&node->mutex);
1539         ret = __btrfs_add_delayed_deletion_item(node, item);
1540         if (unlikely(ret)) {
1541                 btrfs_err(trans->fs_info,
1542                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1543                           index, node->root->root_key.objectid,
1544                           node->inode_id, ret);
1545                 btrfs_delayed_item_release_metadata(dir->root, item);
1546                 btrfs_release_delayed_item(item);
1547         }
1548         mutex_unlock(&node->mutex);
1549 end:
1550         btrfs_release_delayed_node(node);
1551         return ret;
1552 }
1553
1554 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1555 {
1556         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1557
1558         if (!delayed_node)
1559                 return -ENOENT;
1560
1561         /*
1562          * Since we have held i_mutex of this directory, it is impossible that
1563          * a new directory index is added into the delayed node and index_cnt
1564          * is updated now. So we needn't lock the delayed node.
1565          */
1566         if (!delayed_node->index_cnt) {
1567                 btrfs_release_delayed_node(delayed_node);
1568                 return -EINVAL;
1569         }
1570
1571         inode->index_cnt = delayed_node->index_cnt;
1572         btrfs_release_delayed_node(delayed_node);
1573         return 0;
1574 }
1575
1576 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1577                                      struct list_head *ins_list,
1578                                      struct list_head *del_list)
1579 {
1580         struct btrfs_delayed_node *delayed_node;
1581         struct btrfs_delayed_item *item;
1582
1583         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1584         if (!delayed_node)
1585                 return false;
1586
1587         /*
1588          * We can only do one readdir with delayed items at a time because of
1589          * item->readdir_list.
1590          */
1591         inode_unlock_shared(inode);
1592         inode_lock(inode);
1593
1594         mutex_lock(&delayed_node->mutex);
1595         item = __btrfs_first_delayed_insertion_item(delayed_node);
1596         while (item) {
1597                 refcount_inc(&item->refs);
1598                 list_add_tail(&item->readdir_list, ins_list);
1599                 item = __btrfs_next_delayed_item(item);
1600         }
1601
1602         item = __btrfs_first_delayed_deletion_item(delayed_node);
1603         while (item) {
1604                 refcount_inc(&item->refs);
1605                 list_add_tail(&item->readdir_list, del_list);
1606                 item = __btrfs_next_delayed_item(item);
1607         }
1608         mutex_unlock(&delayed_node->mutex);
1609         /*
1610          * This delayed node is still cached in the btrfs inode, so refs
1611          * must be > 1 now, and we needn't check it is going to be freed
1612          * or not.
1613          *
1614          * Besides that, this function is used to read dir, we do not
1615          * insert/delete delayed items in this period. So we also needn't
1616          * requeue or dequeue this delayed node.
1617          */
1618         refcount_dec(&delayed_node->refs);
1619
1620         return true;
1621 }
1622
1623 void btrfs_readdir_put_delayed_items(struct inode *inode,
1624                                      struct list_head *ins_list,
1625                                      struct list_head *del_list)
1626 {
1627         struct btrfs_delayed_item *curr, *next;
1628
1629         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1630                 list_del(&curr->readdir_list);
1631                 if (refcount_dec_and_test(&curr->refs))
1632                         kfree(curr);
1633         }
1634
1635         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1636                 list_del(&curr->readdir_list);
1637                 if (refcount_dec_and_test(&curr->refs))
1638                         kfree(curr);
1639         }
1640
1641         /*
1642          * The VFS is going to do up_read(), so we need to downgrade back to a
1643          * read lock.
1644          */
1645         downgrade_write(&inode->i_rwsem);
1646 }
1647
1648 int btrfs_should_delete_dir_index(struct list_head *del_list,
1649                                   u64 index)
1650 {
1651         struct btrfs_delayed_item *curr;
1652         int ret = 0;
1653
1654         list_for_each_entry(curr, del_list, readdir_list) {
1655                 if (curr->key.offset > index)
1656                         break;
1657                 if (curr->key.offset == index) {
1658                         ret = 1;
1659                         break;
1660                 }
1661         }
1662         return ret;
1663 }
1664
1665 /*
1666  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1667  *
1668  */
1669 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1670                                     struct list_head *ins_list)
1671 {
1672         struct btrfs_dir_item *di;
1673         struct btrfs_delayed_item *curr, *next;
1674         struct btrfs_key location;
1675         char *name;
1676         int name_len;
1677         int over = 0;
1678         unsigned char d_type;
1679
1680         if (list_empty(ins_list))
1681                 return 0;
1682
1683         /*
1684          * Changing the data of the delayed item is impossible. So
1685          * we needn't lock them. And we have held i_mutex of the
1686          * directory, nobody can delete any directory indexes now.
1687          */
1688         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1689                 list_del(&curr->readdir_list);
1690
1691                 if (curr->key.offset < ctx->pos) {
1692                         if (refcount_dec_and_test(&curr->refs))
1693                                 kfree(curr);
1694                         continue;
1695                 }
1696
1697                 ctx->pos = curr->key.offset;
1698
1699                 di = (struct btrfs_dir_item *)curr->data;
1700                 name = (char *)(di + 1);
1701                 name_len = btrfs_stack_dir_name_len(di);
1702
1703                 d_type = fs_ftype_to_dtype(di->type);
1704                 btrfs_disk_key_to_cpu(&location, &di->location);
1705
1706                 over = !dir_emit(ctx, name, name_len,
1707                                location.objectid, d_type);
1708
1709                 if (refcount_dec_and_test(&curr->refs))
1710                         kfree(curr);
1711
1712                 if (over)
1713                         return 1;
1714                 ctx->pos++;
1715         }
1716         return 0;
1717 }
1718
1719 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1720                                   struct btrfs_inode_item *inode_item,
1721                                   struct inode *inode)
1722 {
1723         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1724         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1725         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1726         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1727         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1728         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1729         btrfs_set_stack_inode_generation(inode_item,
1730                                          BTRFS_I(inode)->generation);
1731         btrfs_set_stack_inode_sequence(inode_item,
1732                                        inode_peek_iversion(inode));
1733         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1734         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1735         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1736         btrfs_set_stack_inode_block_group(inode_item, 0);
1737
1738         btrfs_set_stack_timespec_sec(&inode_item->atime,
1739                                      inode->i_atime.tv_sec);
1740         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1741                                       inode->i_atime.tv_nsec);
1742
1743         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1744                                      inode->i_mtime.tv_sec);
1745         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1746                                       inode->i_mtime.tv_nsec);
1747
1748         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1749                                      inode->i_ctime.tv_sec);
1750         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1751                                       inode->i_ctime.tv_nsec);
1752
1753         btrfs_set_stack_timespec_sec(&inode_item->otime,
1754                                      BTRFS_I(inode)->i_otime.tv_sec);
1755         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1756                                      BTRFS_I(inode)->i_otime.tv_nsec);
1757 }
1758
1759 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1760 {
1761         struct btrfs_delayed_node *delayed_node;
1762         struct btrfs_inode_item *inode_item;
1763
1764         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1765         if (!delayed_node)
1766                 return -ENOENT;
1767
1768         mutex_lock(&delayed_node->mutex);
1769         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1770                 mutex_unlock(&delayed_node->mutex);
1771                 btrfs_release_delayed_node(delayed_node);
1772                 return -ENOENT;
1773         }
1774
1775         inode_item = &delayed_node->inode_item;
1776
1777         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1778         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1779         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1780         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1781         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1782         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1783         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1784         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1785
1786         inode_set_iversion_queried(inode,
1787                                    btrfs_stack_inode_sequence(inode_item));
1788         inode->i_rdev = 0;
1789         *rdev = btrfs_stack_inode_rdev(inode_item);
1790         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1791
1792         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1793         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1794
1795         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1796         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1797
1798         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1799         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1800
1801         BTRFS_I(inode)->i_otime.tv_sec =
1802                 btrfs_stack_timespec_sec(&inode_item->otime);
1803         BTRFS_I(inode)->i_otime.tv_nsec =
1804                 btrfs_stack_timespec_nsec(&inode_item->otime);
1805
1806         inode->i_generation = BTRFS_I(inode)->generation;
1807         BTRFS_I(inode)->index_cnt = (u64)-1;
1808
1809         mutex_unlock(&delayed_node->mutex);
1810         btrfs_release_delayed_node(delayed_node);
1811         return 0;
1812 }
1813
1814 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1815                                struct btrfs_root *root, struct inode *inode)
1816 {
1817         struct btrfs_delayed_node *delayed_node;
1818         int ret = 0;
1819
1820         delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1821         if (IS_ERR(delayed_node))
1822                 return PTR_ERR(delayed_node);
1823
1824         mutex_lock(&delayed_node->mutex);
1825         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1826                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1827                 goto release_node;
1828         }
1829
1830         ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1831                                                    delayed_node);
1832         if (ret)
1833                 goto release_node;
1834
1835         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1836         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1837         delayed_node->count++;
1838         atomic_inc(&root->fs_info->delayed_root->items);
1839 release_node:
1840         mutex_unlock(&delayed_node->mutex);
1841         btrfs_release_delayed_node(delayed_node);
1842         return ret;
1843 }
1844
1845 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1846 {
1847         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1848         struct btrfs_delayed_node *delayed_node;
1849
1850         /*
1851          * we don't do delayed inode updates during log recovery because it
1852          * leads to enospc problems.  This means we also can't do
1853          * delayed inode refs
1854          */
1855         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1856                 return -EAGAIN;
1857
1858         delayed_node = btrfs_get_or_create_delayed_node(inode);
1859         if (IS_ERR(delayed_node))
1860                 return PTR_ERR(delayed_node);
1861
1862         /*
1863          * We don't reserve space for inode ref deletion is because:
1864          * - We ONLY do async inode ref deletion for the inode who has only
1865          *   one link(i_nlink == 1), it means there is only one inode ref.
1866          *   And in most case, the inode ref and the inode item are in the
1867          *   same leaf, and we will deal with them at the same time.
1868          *   Since we are sure we will reserve the space for the inode item,
1869          *   it is unnecessary to reserve space for inode ref deletion.
1870          * - If the inode ref and the inode item are not in the same leaf,
1871          *   We also needn't worry about enospc problem, because we reserve
1872          *   much more space for the inode update than it needs.
1873          * - At the worst, we can steal some space from the global reservation.
1874          *   It is very rare.
1875          */
1876         mutex_lock(&delayed_node->mutex);
1877         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1878                 goto release_node;
1879
1880         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1881         delayed_node->count++;
1882         atomic_inc(&fs_info->delayed_root->items);
1883 release_node:
1884         mutex_unlock(&delayed_node->mutex);
1885         btrfs_release_delayed_node(delayed_node);
1886         return 0;
1887 }
1888
1889 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1890 {
1891         struct btrfs_root *root = delayed_node->root;
1892         struct btrfs_fs_info *fs_info = root->fs_info;
1893         struct btrfs_delayed_item *curr_item, *prev_item;
1894
1895         mutex_lock(&delayed_node->mutex);
1896         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1897         while (curr_item) {
1898                 btrfs_delayed_item_release_metadata(root, curr_item);
1899                 prev_item = curr_item;
1900                 curr_item = __btrfs_next_delayed_item(prev_item);
1901                 btrfs_release_delayed_item(prev_item);
1902         }
1903
1904         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1905         while (curr_item) {
1906                 btrfs_delayed_item_release_metadata(root, curr_item);
1907                 prev_item = curr_item;
1908                 curr_item = __btrfs_next_delayed_item(prev_item);
1909                 btrfs_release_delayed_item(prev_item);
1910         }
1911
1912         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1913                 btrfs_release_delayed_iref(delayed_node);
1914
1915         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1916                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
1917                 btrfs_release_delayed_inode(delayed_node);
1918         }
1919         mutex_unlock(&delayed_node->mutex);
1920 }
1921
1922 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1923 {
1924         struct btrfs_delayed_node *delayed_node;
1925
1926         delayed_node = btrfs_get_delayed_node(inode);
1927         if (!delayed_node)
1928                 return;
1929
1930         __btrfs_kill_delayed_node(delayed_node);
1931         btrfs_release_delayed_node(delayed_node);
1932 }
1933
1934 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1935 {
1936         u64 inode_id = 0;
1937         struct btrfs_delayed_node *delayed_nodes[8];
1938         int i, n;
1939
1940         while (1) {
1941                 spin_lock(&root->inode_lock);
1942                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1943                                            (void **)delayed_nodes, inode_id,
1944                                            ARRAY_SIZE(delayed_nodes));
1945                 if (!n) {
1946                         spin_unlock(&root->inode_lock);
1947                         break;
1948                 }
1949
1950                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1951
1952                 for (i = 0; i < n; i++)
1953                         refcount_inc(&delayed_nodes[i]->refs);
1954                 spin_unlock(&root->inode_lock);
1955
1956                 for (i = 0; i < n; i++) {
1957                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1958                         btrfs_release_delayed_node(delayed_nodes[i]);
1959                 }
1960         }
1961 }
1962
1963 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1964 {
1965         struct btrfs_delayed_node *curr_node, *prev_node;
1966
1967         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1968         while (curr_node) {
1969                 __btrfs_kill_delayed_node(curr_node);
1970
1971                 prev_node = curr_node;
1972                 curr_node = btrfs_next_delayed_node(curr_node);
1973                 btrfs_release_delayed_node(prev_node);
1974         }
1975 }
1976