Linux-libre 4.9.46-gnu
[librecmc/linux-libre.git] / drivers / staging / lustre / lustre / obdclass / cl_page.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * Client Lustre Page.
33  *
34  *   Author: Nikita Danilov <nikita.danilov@sun.com>
35  *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_CLASS
39
40 #include "../../include/linux/libcfs/libcfs.h"
41 #include "../include/obd_class.h"
42 #include "../include/obd_support.h"
43 #include <linux/list.h>
44
45 #include "../include/cl_object.h"
46 #include "cl_internal.h"
47
48 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
49
50 # define PASSERT(env, page, expr)                                          \
51         do {                                                               \
52                 if (unlikely(!(expr))) {                                   \
53                         CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
54                         LASSERT(0);                                        \
55                 }                                                          \
56         } while (0)
57
58 # define PINVRNT(env, page, exp) \
59         ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
60
61 /**
62  * Internal version of cl_page_get().
63  *
64  * This function can be used to obtain initial reference to previously
65  * unreferenced cached object. It can be called only if concurrent page
66  * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
67  * associated with \a page.
68  *
69  * Use with care! Not exported.
70  */
71 static void cl_page_get_trust(struct cl_page *page)
72 {
73         LASSERT(atomic_read(&page->cp_ref) > 0);
74         atomic_inc(&page->cp_ref);
75 }
76
77 /**
78  * Returns a slice within a page, corresponding to the given layer in the
79  * device stack.
80  *
81  * \see cl_lock_at()
82  */
83 static const struct cl_page_slice *
84 cl_page_at_trusted(const struct cl_page *page,
85                    const struct lu_device_type *dtype)
86 {
87         const struct cl_page_slice *slice;
88
89         list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
90                 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
91                         return slice;
92         }
93         return NULL;
94 }
95
96 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
97 {
98         struct cl_object *obj  = page->cp_obj;
99
100         PASSERT(env, page, list_empty(&page->cp_batch));
101         PASSERT(env, page, !page->cp_owner);
102         PASSERT(env, page, !page->cp_req);
103         PASSERT(env, page, page->cp_state == CPS_FREEING);
104
105         while (!list_empty(&page->cp_layers)) {
106                 struct cl_page_slice *slice;
107
108                 slice = list_entry(page->cp_layers.next,
109                                    struct cl_page_slice, cpl_linkage);
110                 list_del_init(page->cp_layers.next);
111                 if (unlikely(slice->cpl_ops->cpo_fini))
112                         slice->cpl_ops->cpo_fini(env, slice);
113         }
114         lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
115         cl_object_put(env, obj);
116         lu_ref_fini(&page->cp_reference);
117         kfree(page);
118 }
119
120 /**
121  * Helper function updating page state. This is the only place in the code
122  * where cl_page::cp_state field is mutated.
123  */
124 static inline void cl_page_state_set_trust(struct cl_page *page,
125                                            enum cl_page_state state)
126 {
127         /* bypass const. */
128         *(enum cl_page_state *)&page->cp_state = state;
129 }
130
131 struct cl_page *cl_page_alloc(const struct lu_env *env,
132                               struct cl_object *o, pgoff_t ind,
133                               struct page *vmpage,
134                               enum cl_page_type type)
135 {
136         struct cl_page    *page;
137         struct lu_object_header *head;
138
139         page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS);
140         if (page) {
141                 int result = 0;
142
143                 atomic_set(&page->cp_ref, 1);
144                 page->cp_obj = o;
145                 cl_object_get(o);
146                 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
147                                      page);
148                 page->cp_vmpage = vmpage;
149                 cl_page_state_set_trust(page, CPS_CACHED);
150                 page->cp_type = type;
151                 INIT_LIST_HEAD(&page->cp_layers);
152                 INIT_LIST_HEAD(&page->cp_batch);
153                 INIT_LIST_HEAD(&page->cp_flight);
154                 lu_ref_init(&page->cp_reference);
155                 head = o->co_lu.lo_header;
156                 list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
157                         if (o->co_ops->coo_page_init) {
158                                 result = o->co_ops->coo_page_init(env, o, page,
159                                                                   ind);
160                                 if (result != 0) {
161                                         cl_page_delete0(env, page);
162                                         cl_page_free(env, page);
163                                         page = ERR_PTR(result);
164                                         break;
165                                 }
166                         }
167                 }
168         } else {
169                 page = ERR_PTR(-ENOMEM);
170         }
171         return page;
172 }
173
174 /**
175  * Returns a cl_page with index \a idx at the object \a o, and associated with
176  * the VM page \a vmpage.
177  *
178  * This is the main entry point into the cl_page caching interface. First, a
179  * cache (implemented as a per-object radix tree) is consulted. If page is
180  * found there, it is returned immediately. Otherwise new page is allocated
181  * and returned. In any case, additional reference to page is acquired.
182  *
183  * \see cl_object_find(), cl_lock_find()
184  */
185 struct cl_page *cl_page_find(const struct lu_env *env,
186                              struct cl_object *o,
187                              pgoff_t idx, struct page *vmpage,
188                              enum cl_page_type type)
189 {
190         struct cl_page    *page = NULL;
191         struct cl_object_header *hdr;
192
193         LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
194         might_sleep();
195
196         hdr = cl_object_header(o);
197
198         CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
199                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
200         /* fast path. */
201         if (type == CPT_CACHEABLE) {
202                 /*
203                  * vmpage lock is used to protect the child/parent
204                  * relationship
205                  */
206                 KLASSERT(PageLocked(vmpage));
207                 /*
208                  * cl_vmpage_page() can be called here without any locks as
209                  *
210                  *     - "vmpage" is locked (which prevents ->private from
211                  *       concurrent updates), and
212                  *
213                  *     - "o" cannot be destroyed while current thread holds a
214                  *       reference on it.
215                  */
216                 page = cl_vmpage_page(vmpage, o);
217
218                 if (page)
219                         return page;
220         }
221
222         /* allocate and initialize cl_page */
223         page = cl_page_alloc(env, o, idx, vmpage, type);
224         return page;
225 }
226 EXPORT_SYMBOL(cl_page_find);
227
228 static inline int cl_page_invariant(const struct cl_page *pg)
229 {
230         return cl_page_in_use_noref(pg);
231 }
232
233 static void cl_page_state_set0(const struct lu_env *env,
234                                struct cl_page *page, enum cl_page_state state)
235 {
236         enum cl_page_state old;
237
238         /*
239          * Matrix of allowed state transitions [old][new], for sanity
240          * checking.
241          */
242         static const int allowed_transitions[CPS_NR][CPS_NR] = {
243                 [CPS_CACHED] = {
244                         [CPS_CACHED]  = 0,
245                         [CPS_OWNED]   = 1, /* io finds existing cached page */
246                         [CPS_PAGEIN]  = 0,
247                         [CPS_PAGEOUT] = 1, /* write-out from the cache */
248                         [CPS_FREEING] = 1, /* eviction on the memory pressure */
249                 },
250                 [CPS_OWNED] = {
251                         [CPS_CACHED]  = 1, /* release to the cache */
252                         [CPS_OWNED]   = 0,
253                         [CPS_PAGEIN]  = 1, /* start read immediately */
254                         [CPS_PAGEOUT] = 1, /* start write immediately */
255                         [CPS_FREEING] = 1, /* lock invalidation or truncate */
256                 },
257                 [CPS_PAGEIN] = {
258                         [CPS_CACHED]  = 1, /* io completion */
259                         [CPS_OWNED]   = 0,
260                         [CPS_PAGEIN]  = 0,
261                         [CPS_PAGEOUT] = 0,
262                         [CPS_FREEING] = 0,
263                 },
264                 [CPS_PAGEOUT] = {
265                         [CPS_CACHED]  = 1, /* io completion */
266                         [CPS_OWNED]   = 0,
267                         [CPS_PAGEIN]  = 0,
268                         [CPS_PAGEOUT] = 0,
269                         [CPS_FREEING] = 0,
270                 },
271                 [CPS_FREEING] = {
272                         [CPS_CACHED]  = 0,
273                         [CPS_OWNED]   = 0,
274                         [CPS_PAGEIN]  = 0,
275                         [CPS_PAGEOUT] = 0,
276                         [CPS_FREEING] = 0,
277                 }
278         };
279
280         old = page->cp_state;
281         PASSERT(env, page, allowed_transitions[old][state]);
282         CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
283         PASSERT(env, page, page->cp_state == old);
284         PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
285         cl_page_state_set_trust(page, state);
286 }
287
288 static void cl_page_state_set(const struct lu_env *env,
289                               struct cl_page *page, enum cl_page_state state)
290 {
291         cl_page_state_set0(env, page, state);
292 }
293
294 /**
295  * Acquires an additional reference to a page.
296  *
297  * This can be called only by caller already possessing a reference to \a
298  * page.
299  *
300  * \see cl_object_get(), cl_lock_get().
301  */
302 void cl_page_get(struct cl_page *page)
303 {
304         cl_page_get_trust(page);
305 }
306 EXPORT_SYMBOL(cl_page_get);
307
308 /**
309  * Releases a reference to a page.
310  *
311  * When last reference is released, page is returned to the cache, unless it
312  * is in cl_page_state::CPS_FREEING state, in which case it is immediately
313  * destroyed.
314  *
315  * \see cl_object_put(), cl_lock_put().
316  */
317 void cl_page_put(const struct lu_env *env, struct cl_page *page)
318 {
319         CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
320                        atomic_read(&page->cp_ref));
321
322         if (atomic_dec_and_test(&page->cp_ref)) {
323                 LASSERT(page->cp_state == CPS_FREEING);
324
325                 LASSERT(atomic_read(&page->cp_ref) == 0);
326                 PASSERT(env, page, !page->cp_owner);
327                 PASSERT(env, page, list_empty(&page->cp_batch));
328                 /*
329                  * Page is no longer reachable by other threads. Tear
330                  * it down.
331                  */
332                 cl_page_free(env, page);
333         }
334 }
335 EXPORT_SYMBOL(cl_page_put);
336
337 /**
338  * Returns a cl_page associated with a VM page, and given cl_object.
339  */
340 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
341 {
342         struct cl_page *page;
343
344         KLASSERT(PageLocked(vmpage));
345
346         /*
347          * NOTE: absence of races and liveness of data are guaranteed by page
348          *       lock on a "vmpage". That works because object destruction has
349          *       bottom-to-top pass.
350          */
351
352         page = (struct cl_page *)vmpage->private;
353         if (page) {
354                 cl_page_get_trust(page);
355                 LASSERT(page->cp_type == CPT_CACHEABLE);
356         }
357         return page;
358 }
359 EXPORT_SYMBOL(cl_vmpage_page);
360
361 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
362                                        const struct lu_device_type *dtype)
363 {
364         return cl_page_at_trusted(page, dtype);
365 }
366 EXPORT_SYMBOL(cl_page_at);
367
368 #define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
369
370 #define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...)              \
371 ({                                                                    \
372         const struct lu_env     *__env  = (_env);                   \
373         struct cl_page       *__page = (_page);            \
374         const struct cl_page_slice *__scan;                          \
375         int                      __result;                         \
376         ptrdiff_t                  __op   = (_op);                   \
377         int                    (*__method)_proto;                   \
378                                                                         \
379         __result = 0;                                              \
380         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
381                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
382                 if (__method) {                                         \
383                         __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
384                         if (__result != 0)                              \
385                                 break;                                  \
386                 }                                                       \
387         }                                                               \
388         if (__result > 0)                                              \
389                 __result = 0;                                      \
390         __result;                                                      \
391 })
392
393 #define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...)           \
394 ({                                                                      \
395         const struct lu_env        *__env  = (_env);                    \
396         struct cl_page             *__page = (_page);                   \
397         const struct cl_page_slice *__scan;                             \
398         int                         __result;                           \
399         ptrdiff_t                   __op   = (_op);                     \
400         int                       (*__method)_proto;                    \
401                                                                         \
402         __result = 0;                                                   \
403         list_for_each_entry_reverse(__scan, &__page->cp_layers,         \
404                                         cpl_linkage) {                  \
405                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
406                 if (__method) {                                         \
407                         __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
408                         if (__result != 0)                              \
409                                 break;                                  \
410                 }                                                       \
411         }                                                               \
412         if (__result > 0)                                               \
413                 __result = 0;                                           \
414         __result;                                                       \
415 })
416
417 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)              \
418 do {                                                                \
419         const struct lu_env     *__env  = (_env);                   \
420         struct cl_page       *__page = (_page);            \
421         const struct cl_page_slice *__scan;                          \
422         ptrdiff_t                  __op   = (_op);                   \
423         void                  (*__method)_proto;                    \
424                                                                         \
425         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
426                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
427                 if (__method)                                           \
428                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
429         }                                                               \
430 } while (0)
431
432 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)          \
433 do {                                                                    \
434         const struct lu_env     *__env  = (_env);                       \
435         struct cl_page       *__page = (_page);                \
436         const struct cl_page_slice *__scan;                              \
437         ptrdiff_t                  __op   = (_op);                       \
438         void                  (*__method)_proto;                        \
439                                                                             \
440         list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
441                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
442                 if (__method)                                           \
443                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
444         }                                                               \
445 } while (0)
446
447 static int cl_page_invoke(const struct lu_env *env,
448                           struct cl_io *io, struct cl_page *page, ptrdiff_t op)
449
450 {
451         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
452         return CL_PAGE_INVOKE(env, page, op,
453                               (const struct lu_env *,
454                                const struct cl_page_slice *, struct cl_io *),
455                               io);
456 }
457
458 static void cl_page_invoid(const struct lu_env *env,
459                            struct cl_io *io, struct cl_page *page, ptrdiff_t op)
460
461 {
462         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
463         CL_PAGE_INVOID(env, page, op,
464                        (const struct lu_env *,
465                         const struct cl_page_slice *, struct cl_io *), io);
466 }
467
468 static void cl_page_owner_clear(struct cl_page *page)
469 {
470         if (page->cp_owner) {
471                 LASSERT(page->cp_owner->ci_owned_nr > 0);
472                 page->cp_owner->ci_owned_nr--;
473                 page->cp_owner = NULL;
474         }
475 }
476
477 static void cl_page_owner_set(struct cl_page *page)
478 {
479         page->cp_owner->ci_owned_nr++;
480 }
481
482 void cl_page_disown0(const struct lu_env *env,
483                      struct cl_io *io, struct cl_page *pg)
484 {
485         enum cl_page_state state;
486
487         state = pg->cp_state;
488         PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
489         PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
490         cl_page_owner_clear(pg);
491
492         if (state == CPS_OWNED)
493                 cl_page_state_set(env, pg, CPS_CACHED);
494         /*
495          * Completion call-backs are executed in the bottom-up order, so that
496          * uppermost layer (llite), responsible for VFS/VM interaction runs
497          * last and can release locks safely.
498          */
499         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
500                                (const struct lu_env *,
501                                 const struct cl_page_slice *, struct cl_io *),
502                                io);
503 }
504
505 /**
506  * returns true, iff page is owned by the given io.
507  */
508 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
509 {
510         struct cl_io *top = cl_io_top((struct cl_io *)io);
511         LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
512         return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
513 }
514 EXPORT_SYMBOL(cl_page_is_owned);
515
516 /**
517  * Try to own a page by IO.
518  *
519  * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
520  * into cl_page_state::CPS_OWNED state.
521  *
522  * \pre  !cl_page_is_owned(pg, io)
523  * \post result == 0 iff cl_page_is_owned(pg, io)
524  *
525  * \retval 0   success
526  *
527  * \retval -ve failure, e.g., page was destroyed (and landed in
528  *           cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
529  *           or, page was owned by another thread, or in IO.
530  *
531  * \see cl_page_disown()
532  * \see cl_page_operations::cpo_own()
533  * \see cl_page_own_try()
534  * \see cl_page_own
535  */
536 static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
537                         struct cl_page *pg, int nonblock)
538 {
539         int result;
540
541         PINVRNT(env, pg, !cl_page_is_owned(pg, io));
542
543         io = cl_io_top(io);
544
545         if (pg->cp_state == CPS_FREEING) {
546                 result = -ENOENT;
547         } else {
548                 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
549                                         (const struct lu_env *,
550                                          const struct cl_page_slice *,
551                                          struct cl_io *, int),
552                                         io, nonblock);
553                 if (result == 0) {
554                         PASSERT(env, pg, !pg->cp_owner);
555                         PASSERT(env, pg, !pg->cp_req);
556                         pg->cp_owner = cl_io_top(io);
557                         cl_page_owner_set(pg);
558                         if (pg->cp_state != CPS_FREEING) {
559                                 cl_page_state_set(env, pg, CPS_OWNED);
560                         } else {
561                                 cl_page_disown0(env, io, pg);
562                                 result = -ENOENT;
563                         }
564                 }
565         }
566         PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
567         return result;
568 }
569
570 /**
571  * Own a page, might be blocked.
572  *
573  * \see cl_page_own0()
574  */
575 int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
576 {
577         return cl_page_own0(env, io, pg, 0);
578 }
579 EXPORT_SYMBOL(cl_page_own);
580
581 /**
582  * Nonblock version of cl_page_own().
583  *
584  * \see cl_page_own0()
585  */
586 int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
587                     struct cl_page *pg)
588 {
589         return cl_page_own0(env, io, pg, 1);
590 }
591 EXPORT_SYMBOL(cl_page_own_try);
592
593 /**
594  * Assume page ownership.
595  *
596  * Called when page is already locked by the hosting VM.
597  *
598  * \pre !cl_page_is_owned(pg, io)
599  * \post cl_page_is_owned(pg, io)
600  *
601  * \see cl_page_operations::cpo_assume()
602  */
603 void cl_page_assume(const struct lu_env *env,
604                     struct cl_io *io, struct cl_page *pg)
605 {
606         PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
607
608         io = cl_io_top(io);
609
610         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
611         PASSERT(env, pg, !pg->cp_owner);
612         pg->cp_owner = cl_io_top(io);
613         cl_page_owner_set(pg);
614         cl_page_state_set(env, pg, CPS_OWNED);
615 }
616 EXPORT_SYMBOL(cl_page_assume);
617
618 /**
619  * Releases page ownership without unlocking the page.
620  *
621  * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
622  * underlying VM page (as VM is supposed to do this itself).
623  *
624  * \pre   cl_page_is_owned(pg, io)
625  * \post !cl_page_is_owned(pg, io)
626  *
627  * \see cl_page_assume()
628  */
629 void cl_page_unassume(const struct lu_env *env,
630                       struct cl_io *io, struct cl_page *pg)
631 {
632         PINVRNT(env, pg, cl_page_is_owned(pg, io));
633         PINVRNT(env, pg, cl_page_invariant(pg));
634
635         io = cl_io_top(io);
636         cl_page_owner_clear(pg);
637         cl_page_state_set(env, pg, CPS_CACHED);
638         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
639                                (const struct lu_env *,
640                                 const struct cl_page_slice *, struct cl_io *),
641                                io);
642 }
643 EXPORT_SYMBOL(cl_page_unassume);
644
645 /**
646  * Releases page ownership.
647  *
648  * Moves page into cl_page_state::CPS_CACHED.
649  *
650  * \pre   cl_page_is_owned(pg, io)
651  * \post !cl_page_is_owned(pg, io)
652  *
653  * \see cl_page_own()
654  * \see cl_page_operations::cpo_disown()
655  */
656 void cl_page_disown(const struct lu_env *env,
657                     struct cl_io *io, struct cl_page *pg)
658 {
659         PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
660                 pg->cp_state == CPS_FREEING);
661
662         io = cl_io_top(io);
663         cl_page_disown0(env, io, pg);
664 }
665 EXPORT_SYMBOL(cl_page_disown);
666
667 /**
668  * Called when page is to be removed from the object, e.g., as a result of
669  * truncate.
670  *
671  * Calls cl_page_operations::cpo_discard() top-to-bottom.
672  *
673  * \pre cl_page_is_owned(pg, io)
674  *
675  * \see cl_page_operations::cpo_discard()
676  */
677 void cl_page_discard(const struct lu_env *env,
678                      struct cl_io *io, struct cl_page *pg)
679 {
680         PINVRNT(env, pg, cl_page_is_owned(pg, io));
681         PINVRNT(env, pg, cl_page_invariant(pg));
682
683         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
684 }
685 EXPORT_SYMBOL(cl_page_discard);
686
687 /**
688  * Version of cl_page_delete() that can be called for not fully constructed
689  * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
690  * path. Doesn't check page invariant.
691  */
692 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
693 {
694         PASSERT(env, pg, pg->cp_state != CPS_FREEING);
695
696         /*
697          * Severe all ways to obtain new pointers to @pg.
698          */
699         cl_page_owner_clear(pg);
700
701         cl_page_state_set0(env, pg, CPS_FREEING);
702
703         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
704                                (const struct lu_env *,
705                                 const struct cl_page_slice *));
706 }
707
708 /**
709  * Called when a decision is made to throw page out of memory.
710  *
711  * Notifies all layers about page destruction by calling
712  * cl_page_operations::cpo_delete() method top-to-bottom.
713  *
714  * Moves page into cl_page_state::CPS_FREEING state (this is the only place
715  * where transition to this state happens).
716  *
717  * Eliminates all venues through which new references to the page can be
718  * obtained:
719  *
720  *     - removes page from the radix trees,
721  *
722  *     - breaks linkage from VM page to cl_page.
723  *
724  * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
725  * drain after some time, at which point page will be recycled.
726  *
727  * \pre  VM page is locked
728  * \post pg->cp_state == CPS_FREEING
729  *
730  * \see cl_page_operations::cpo_delete()
731  */
732 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
733 {
734         PINVRNT(env, pg, cl_page_invariant(pg));
735         cl_page_delete0(env, pg);
736 }
737 EXPORT_SYMBOL(cl_page_delete);
738
739 /**
740  * Marks page up-to-date.
741  *
742  * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
743  * layer responsible for VM interaction has to mark/clear page as up-to-date
744  * by the \a uptodate argument.
745  *
746  * \see cl_page_operations::cpo_export()
747  */
748 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
749 {
750         PINVRNT(env, pg, cl_page_invariant(pg));
751         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
752                        (const struct lu_env *,
753                         const struct cl_page_slice *, int), uptodate);
754 }
755 EXPORT_SYMBOL(cl_page_export);
756
757 /**
758  * Returns true, iff \a pg is VM locked in a suitable sense by the calling
759  * thread.
760  */
761 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
762 {
763         int result;
764         const struct cl_page_slice *slice;
765
766         slice = container_of(pg->cp_layers.next,
767                              const struct cl_page_slice, cpl_linkage);
768         PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
769         /*
770          * Call ->cpo_is_vmlocked() directly instead of going through
771          * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
772          * cl_page_invariant().
773          */
774         result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
775         PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
776         return result == -EBUSY;
777 }
778 EXPORT_SYMBOL(cl_page_is_vmlocked);
779
780 static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
781 {
782         return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN;
783 }
784
785 static void cl_page_io_start(const struct lu_env *env,
786                              struct cl_page *pg, enum cl_req_type crt)
787 {
788         /*
789          * Page is queued for IO, change its state.
790          */
791         cl_page_owner_clear(pg);
792         cl_page_state_set(env, pg, cl_req_type_state(crt));
793 }
794
795 /**
796  * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
797  * called top-to-bottom. Every layer either agrees to submit this page (by
798  * returning 0), or requests to omit this page (by returning -EALREADY). Layer
799  * handling interactions with the VM also has to inform VM that page is under
800  * transfer now.
801  */
802 int cl_page_prep(const struct lu_env *env, struct cl_io *io,
803                  struct cl_page *pg, enum cl_req_type crt)
804 {
805         int result;
806
807         PINVRNT(env, pg, cl_page_is_owned(pg, io));
808         PINVRNT(env, pg, cl_page_invariant(pg));
809         PINVRNT(env, pg, crt < CRT_NR);
810
811         /*
812          * XXX this has to be called bottom-to-top, so that llite can set up
813          * PG_writeback without risking other layers deciding to skip this
814          * page.
815          */
816         if (crt >= CRT_NR)
817                 return -EINVAL;
818         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
819         if (result == 0)
820                 cl_page_io_start(env, pg, crt);
821
822         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
823         return result;
824 }
825 EXPORT_SYMBOL(cl_page_prep);
826
827 /**
828  * Notify layers about transfer completion.
829  *
830  * Invoked by transfer sub-system (which is a part of osc) to notify layers
831  * that a transfer, of which this page is a part of has completed.
832  *
833  * Completion call-backs are executed in the bottom-up order, so that
834  * uppermost layer (llite), responsible for the VFS/VM interaction runs last
835  * and can release locks safely.
836  *
837  * \pre  pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
838  * \post pg->cp_state == CPS_CACHED
839  *
840  * \see cl_page_operations::cpo_completion()
841  */
842 void cl_page_completion(const struct lu_env *env,
843                         struct cl_page *pg, enum cl_req_type crt, int ioret)
844 {
845         struct cl_sync_io *anchor = pg->cp_sync_io;
846
847         PASSERT(env, pg, crt < CRT_NR);
848         /* cl_page::cp_req already cleared by the caller (osc_completion()) */
849         PASSERT(env, pg, !pg->cp_req);
850         PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
851
852         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
853
854         cl_page_state_set(env, pg, CPS_CACHED);
855         if (crt >= CRT_NR)
856                 return;
857         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
858                                (const struct lu_env *,
859                                 const struct cl_page_slice *, int), ioret);
860         if (anchor) {
861                 LASSERT(pg->cp_sync_io == anchor);
862                 pg->cp_sync_io = NULL;
863         }
864         /*
865          * As page->cp_obj is pinned by a reference from page->cp_req, it is
866          * safe to call cl_page_put() without risking object destruction in a
867          * non-blocking context.
868          */
869         cl_page_put(env, pg);
870
871         if (anchor)
872                 cl_sync_io_note(env, anchor, ioret);
873 }
874 EXPORT_SYMBOL(cl_page_completion);
875
876 /**
877  * Notify layers that transfer formation engine decided to yank this page from
878  * the cache and to make it a part of a transfer.
879  *
880  * \pre  pg->cp_state == CPS_CACHED
881  * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
882  *
883  * \see cl_page_operations::cpo_make_ready()
884  */
885 int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
886                        enum cl_req_type crt)
887 {
888         int result;
889
890         PINVRNT(env, pg, crt < CRT_NR);
891
892         if (crt >= CRT_NR)
893                 return -EINVAL;
894         result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
895                                 (const struct lu_env *,
896                                  const struct cl_page_slice *));
897         if (result == 0) {
898                 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
899                 cl_page_io_start(env, pg, crt);
900         }
901         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
902         return result;
903 }
904 EXPORT_SYMBOL(cl_page_make_ready);
905
906 /**
907  * Called if a pge is being written back by kernel's intention.
908  *
909  * \pre  cl_page_is_owned(pg, io)
910  * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
911  *
912  * \see cl_page_operations::cpo_flush()
913  */
914 int cl_page_flush(const struct lu_env *env, struct cl_io *io,
915                   struct cl_page *pg)
916 {
917         int result;
918
919         PINVRNT(env, pg, cl_page_is_owned(pg, io));
920         PINVRNT(env, pg, cl_page_invariant(pg));
921
922         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
923
924         CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
925         return result;
926 }
927 EXPORT_SYMBOL(cl_page_flush);
928
929 /**
930  * Checks whether page is protected by any extent lock is at least required
931  * mode.
932  *
933  * \return the same as in cl_page_operations::cpo_is_under_lock() method.
934  * \see cl_page_operations::cpo_is_under_lock()
935  */
936 int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
937                           struct cl_page *page, pgoff_t *max_index)
938 {
939         int rc;
940
941         PINVRNT(env, page, cl_page_invariant(page));
942
943         rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
944                                     (const struct lu_env *,
945                                      const struct cl_page_slice *,
946                                       struct cl_io *, pgoff_t *),
947                                     io, max_index);
948         return rc;
949 }
950 EXPORT_SYMBOL(cl_page_is_under_lock);
951
952 /**
953  * Tells transfer engine that only part of a page is to be transmitted.
954  *
955  * \see cl_page_operations::cpo_clip()
956  */
957 void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
958                   int from, int to)
959 {
960         PINVRNT(env, pg, cl_page_invariant(pg));
961
962         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
963         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
964                        (const struct lu_env *,
965                         const struct cl_page_slice *, int, int),
966                        from, to);
967 }
968 EXPORT_SYMBOL(cl_page_clip);
969
970 /**
971  * Prints human readable representation of \a pg to the \a f.
972  */
973 void cl_page_header_print(const struct lu_env *env, void *cookie,
974                           lu_printer_t printer, const struct cl_page *pg)
975 {
976         (*printer)(env, cookie,
977                    "page@%p[%d %p %d %d %p %p]\n",
978                    pg, atomic_read(&pg->cp_ref), pg->cp_obj,
979                    pg->cp_state, pg->cp_type,
980                    pg->cp_owner, pg->cp_req);
981 }
982 EXPORT_SYMBOL(cl_page_header_print);
983
984 /**
985  * Prints human readable representation of \a pg to the \a f.
986  */
987 void cl_page_print(const struct lu_env *env, void *cookie,
988                    lu_printer_t printer, const struct cl_page *pg)
989 {
990         cl_page_header_print(env, cookie, printer, pg);
991         CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
992                        (const struct lu_env *env,
993                         const struct cl_page_slice *slice,
994                         void *cookie, lu_printer_t p), cookie, printer);
995         (*printer)(env, cookie, "end page@%p\n", pg);
996 }
997 EXPORT_SYMBOL(cl_page_print);
998
999 /**
1000  * Cancel a page which is still in a transfer.
1001  */
1002 int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1003 {
1004         return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1005                               (const struct lu_env *,
1006                                const struct cl_page_slice *));
1007 }
1008
1009 /**
1010  * Converts a byte offset within object \a obj into a page index.
1011  */
1012 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1013 {
1014         /*
1015          * XXX for now.
1016          */
1017         return (loff_t)idx << PAGE_SHIFT;
1018 }
1019 EXPORT_SYMBOL(cl_offset);
1020
1021 /**
1022  * Converts a page index into a byte offset within object \a obj.
1023  */
1024 pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1025 {
1026         /*
1027          * XXX for now.
1028          */
1029         return offset >> PAGE_SHIFT;
1030 }
1031 EXPORT_SYMBOL(cl_index);
1032
1033 size_t cl_page_size(const struct cl_object *obj)
1034 {
1035         return 1UL << PAGE_SHIFT;
1036 }
1037 EXPORT_SYMBOL(cl_page_size);
1038
1039 /**
1040  * Adds page slice to the compound page.
1041  *
1042  * This is called by cl_object_operations::coo_page_init() methods to add a
1043  * per-layer state to the page. New state is added at the end of
1044  * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1045  *
1046  * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1047  */
1048 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
1049                        struct cl_object *obj, pgoff_t index,
1050                        const struct cl_page_operations *ops)
1051 {
1052         list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1053         slice->cpl_obj  = obj;
1054         slice->cpl_index = index;
1055         slice->cpl_ops  = ops;
1056         slice->cpl_page = page;
1057 }
1058 EXPORT_SYMBOL(cl_page_slice_add);
1059
1060 /**
1061  * Allocate and initialize cl_cache, called by ll_init_sbi().
1062  */
1063 struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1064 {
1065         struct cl_client_cache  *cache = NULL;
1066
1067         cache = kzalloc(sizeof(*cache), GFP_KERNEL);
1068         if (!cache)
1069                 return NULL;
1070
1071         /* Initialize cache data */
1072         atomic_set(&cache->ccc_users, 1);
1073         cache->ccc_lru_max = lru_page_max;
1074         atomic_long_set(&cache->ccc_lru_left, lru_page_max);
1075         spin_lock_init(&cache->ccc_lru_lock);
1076         INIT_LIST_HEAD(&cache->ccc_lru);
1077
1078         atomic_long_set(&cache->ccc_unstable_nr, 0);
1079         init_waitqueue_head(&cache->ccc_unstable_waitq);
1080
1081         return cache;
1082 }
1083 EXPORT_SYMBOL(cl_cache_init);
1084
1085 /**
1086  * Increase cl_cache refcount
1087  */
1088 void cl_cache_incref(struct cl_client_cache *cache)
1089 {
1090         atomic_inc(&cache->ccc_users);
1091 }
1092 EXPORT_SYMBOL(cl_cache_incref);
1093
1094 /**
1095  * Decrease cl_cache refcount and free the cache if refcount=0.
1096  * Since llite, lov and osc all hold cl_cache refcount,
1097  * the free will not cause race. (LU-6173)
1098  */
1099 void cl_cache_decref(struct cl_client_cache *cache)
1100 {
1101         if (atomic_dec_and_test(&cache->ccc_users))
1102                 kfree(cache);
1103 }
1104 EXPORT_SYMBOL(cl_cache_decref);