1 diff -Nur -x .git vfs-4cbe5a5/Documentation/filesystems/Locking vfs-3d5a648/Documentation/filesystems/Locking
2 --- vfs-4cbe5a5/Documentation/filesystems/Locking 2012-09-01 19:39:58.000000000 +0200
3 +++ vfs-3d5a648/Documentation/filesystems/Locking 2012-09-05 16:35:20.000000000 +0200
5 int (*atomic_open)(struct inode *, struct dentry *,
6 struct file *, unsigned open_flag,
7 umode_t create_mode, int *opened);
8 + int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
18 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
20 diff -Nur -x .git vfs-4cbe5a5/Documentation/filesystems/overlayfs.txt vfs-3d5a648/Documentation/filesystems/overlayfs.txt
21 --- vfs-4cbe5a5/Documentation/filesystems/overlayfs.txt 1970-01-01 01:00:00.000000000 +0100
22 +++ vfs-3d5a648/Documentation/filesystems/overlayfs.txt 2012-09-05 16:35:20.000000000 +0200
24 +Written by: Neil Brown <neilb@suse.de>
29 +This document describes a prototype for a new approach to providing
30 +overlay-filesystem functionality in Linux (sometimes referred to as
31 +union-filesystems). An overlay-filesystem tries to present a
32 +filesystem which is the result over overlaying one filesystem on top
35 +The result will inevitably fail to look exactly like a normal
36 +filesystem for various technical reasons. The expectation is that
37 +many use cases will be able to ignore these differences.
39 +This approach is 'hybrid' because the objects that appear in the
40 +filesystem do not all appear to belong to that filesystem. In many
41 +cases an object accessed in the union will be indistinguishable
42 +from accessing the corresponding object from the original filesystem.
43 +This is most obvious from the 'st_dev' field returned by stat(2).
45 +While directories will report an st_dev from the overlay-filesystem,
46 +all non-directory objects will report an st_dev from the lower or
47 +upper filesystem that is providing the object. Similarly st_ino will
48 +only be unique when combined with st_dev, and both of these can change
49 +over the lifetime of a non-directory object. Many applications and
50 +tools ignore these values and will not be affected.
55 +An overlay filesystem combines two filesystems - an 'upper' filesystem
56 +and a 'lower' filesystem. When a name exists in both filesystems, the
57 +object in the 'upper' filesystem is visible while the object in the
58 +'lower' filesystem is either hidden or, in the case of directories,
59 +merged with the 'upper' object.
61 +It would be more correct to refer to an upper and lower 'directory
62 +tree' rather than 'filesystem' as it is quite possible for both
63 +directory trees to be in the same filesystem and there is no
64 +requirement that the root of a filesystem be given for either upper or
67 +The lower filesystem can be any filesystem supported by Linux and does
68 +not need to be writable. The lower filesystem can even be another
69 +overlayfs. The upper filesystem will normally be writable and if it
70 +is it must support the creation of trusted.* extended attributes, and
71 +must provide valid d_type in readdir responses, at least for symbolic
72 +links - so NFS is not suitable.
74 +A read-only overlay of two read-only filesystems may use any
80 +Overlaying mainly involves directories. If a given name appears in both
81 +upper and lower filesystems and refers to a non-directory in either,
82 +then the lower object is hidden - the name refers only to the upper
85 +Where both upper and lower objects are directories, a merged directory
88 +At mount time, the two directories given as mount options are combined
89 +into a merged directory:
91 + mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper /overlay
93 +Then whenever a lookup is requested in such a merged directory, the
94 +lookup is performed in each actual directory and the combined result
95 +is cached in the dentry belonging to the overlay filesystem. If both
96 +actual lookups find directories, both are stored and a merged
97 +directory is created, otherwise only one is stored: the upper if it
98 +exists, else the lower.
100 +Only the lists of names from directories are merged. Other content
101 +such as metadata and extended attributes are reported for the upper
102 +directory only. These attributes of the lower directory are hidden.
104 +whiteouts and opaque directories
105 +--------------------------------
107 +In order to support rm and rmdir without changing the lower
108 +filesystem, an overlay filesystem needs to record in the upper filesystem
109 +that files have been removed. This is done using whiteouts and opaque
110 +directories (non-directories are always opaque).
112 +The overlay filesystem uses extended attributes with a
113 +"trusted.overlay." prefix to record these details.
115 +A whiteout is created as a symbolic link with target
116 +"(overlay-whiteout)" and with xattr "trusted.overlay.whiteout" set to "y".
117 +When a whiteout is found in the upper level of a merged directory, any
118 +matching name in the lower level is ignored, and the whiteout itself
121 +A directory is made opaque by setting the xattr "trusted.overlay.opaque"
122 +to "y". Where the upper filesystem contains an opaque directory, any
123 +directory in the lower filesystem with the same name is ignored.
128 +When a 'readdir' request is made on a merged directory, the upper and
129 +lower directories are each read and the name lists merged in the
130 +obvious way (upper is read first, then lower - entries that already
131 +exist are not re-added). This merged name list is cached in the
132 +'struct file' and so remains as long as the file is kept open. If the
133 +directory is opened and read by two processes at the same time, they
134 +will each have separate caches. A seekdir to the start of the
135 +directory (offset 0) followed by a readdir will cause the cache to be
136 +discarded and rebuilt.
138 +This means that changes to the merged directory do not appear while a
139 +directory is being read. This is unlikely to be noticed by many
142 +seek offsets are assigned sequentially when the directories are read.
144 + - read part of a directory
145 + - remember an offset, and close the directory
146 + - re-open the directory some time later
147 + - seek to the remembered offset
149 +there may be little correlation between the old and new locations in
150 +the list of filenames, particularly if anything has changed in the
153 +Readdir on directories that are not merged is simply handled by the
154 +underlying directory (upper or lower).
160 +Objects that are not directories (files, symlinks, device-special
161 +files etc.) are presented either from the upper or lower filesystem as
162 +appropriate. When a file in the lower filesystem is accessed in a way
163 +the requires write-access, such as opening for write access, changing
164 +some metadata etc., the file is first copied from the lower filesystem
165 +to the upper filesystem (copy_up). Note that creating a hard-link
166 +also requires copy_up, though of course creation of a symlink does
169 +The copy_up may turn out to be unnecessary, for example if the file is
170 +opened for read-write but the data is not modified.
172 +The copy_up process first makes sure that the containing directory
173 +exists in the upper filesystem - creating it and any parents as
174 +necessary. It then creates the object with the same metadata (owner,
175 +mode, mtime, symlink-target etc.) and then if the object is a file, the
176 +data is copied from the lower to the upper filesystem. Finally any
177 +extended attributes are copied up.
179 +Once the copy_up is complete, the overlay filesystem simply
180 +provides direct access to the newly created file in the upper
181 +filesystem - future operations on the file are barely noticed by the
182 +overlay filesystem (though an operation on the name of the file such as
183 +rename or unlink will of course be noticed and handled).
186 +Non-standard behavior
187 +---------------------
189 +The copy_up operation essentially creates a new, identical file and
190 +moves it over to the old name. The new file may be on a different
191 +filesystem, so both st_dev and st_ino of the file may change.
193 +Any open files referring to this inode will access the old data and
194 +metadata. Similarly any file locks obtained before copy_up will not
195 +apply to the copied up file.
197 +On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and
198 +fsetxattr(2) will fail with EROFS.
200 +If a file with multiple hard links is copied up, then this will
201 +"break" the link. Changes will not be propagated to other names
202 +referring to the same inode.
204 +Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory
205 +object in overlayfs will not contain valid absolute paths, only
206 +relative paths leading up to the filesystem's root. This will be
207 +fixed in the future.
209 +Some operations are not atomic, for example a crash during copy_up or
210 +rename will leave the filesystem in an inconsistent state. This will
211 +be addressed in the future.
213 +Changes to underlying filesystems
214 +---------------------------------
216 +Offline changes, when the overlay is not mounted, are allowed to either
217 +the upper or the lower trees.
219 +Changes to the underlying filesystems while part of a mounted overlay
220 +filesystem are not allowed. If the underlying filesystem is changed,
221 +the behavior of the overlay is undefined, though it will not result in
222 +a crash or deadlock.
223 diff -Nur -x .git vfs-4cbe5a5/Documentation/filesystems/vfs.txt vfs-3d5a648/Documentation/filesystems/vfs.txt
224 --- vfs-4cbe5a5/Documentation/filesystems/vfs.txt 2012-09-01 19:39:58.000000000 +0200
225 +++ vfs-3d5a648/Documentation/filesystems/vfs.txt 2012-09-05 16:35:20.000000000 +0200
227 int (*atomic_open)(struct inode *, struct dentry *,
228 struct file *, unsigned open_flag,
229 umode_t create_mode, int *opened);
230 + int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
233 Again, all methods are called without any locks being held, unless
235 but instead uses bmap to find out where the blocks in the file
236 are and uses those addresses directly.
238 + dentry_open: this is an alternative to f_op->open(), the difference is that
239 + this method may open a file not necessarily originating from the same
240 + filesystem as the one i_op->open() was called on. It may be
241 + useful for stacking filesystems which want to allow native I/O directly
242 + on underlying files.
245 invalidatepage: If a page has PagePrivate set, then invalidatepage
246 will be called when part or all of the page is to be removed
247 diff -Nur -x .git vfs-4cbe5a5/fs/ecryptfs/main.c vfs-3d5a648/fs/ecryptfs/main.c
248 --- vfs-4cbe5a5/fs/ecryptfs/main.c 2012-09-01 19:39:58.000000000 +0200
249 +++ vfs-3d5a648/fs/ecryptfs/main.c 2012-09-05 16:35:20.000000000 +0200
251 s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
252 s->s_blocksize = path.dentry->d_sb->s_blocksize;
253 s->s_magic = ECRYPTFS_SUPER_MAGIC;
254 + s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
257 + if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
258 + printk(KERN_ERR "eCryptfs: maximum fs stacking depth exceeded\n");
262 inode = ecryptfs_get_inode(path.dentry->d_inode, s);
264 diff -Nur -x .git vfs-4cbe5a5/fs/internal.h vfs-3d5a648/fs/internal.h
265 --- vfs-4cbe5a5/fs/internal.h 2012-09-01 19:39:58.000000000 +0200
266 +++ vfs-3d5a648/fs/internal.h 2012-09-05 16:35:20.000000000 +0200
268 extern void __init chrdev_init(void);
273 -extern int __inode_permission(struct inode *, int);
278 extern int copy_mount_options(const void __user *, unsigned long *);
279 diff -Nur -x .git vfs-4cbe5a5/fs/Kconfig vfs-3d5a648/fs/Kconfig
280 --- vfs-4cbe5a5/fs/Kconfig 2012-09-01 19:39:58.000000000 +0200
281 +++ vfs-3d5a648/fs/Kconfig 2012-09-05 16:35:20.000000000 +0200
284 source "fs/autofs4/Kconfig"
285 source "fs/fuse/Kconfig"
286 +source "fs/overlayfs/Kconfig"
289 tristate "Character device in Userspace support"
290 diff -Nur -x .git vfs-4cbe5a5/fs/Makefile vfs-3d5a648/fs/Makefile
291 --- vfs-4cbe5a5/fs/Makefile 2012-09-01 19:39:58.000000000 +0200
292 +++ vfs-3d5a648/fs/Makefile 2012-09-05 16:35:20.000000000 +0200
294 obj-$(CONFIG_AUTOFS4_FS) += autofs4/
295 obj-$(CONFIG_ADFS_FS) += adfs/
296 obj-$(CONFIG_FUSE_FS) += fuse/
297 +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
298 obj-$(CONFIG_UDF_FS) += udf/
299 obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
300 obj-$(CONFIG_OMFS_FS) += omfs/
301 diff -Nur -x .git vfs-4cbe5a5/fs/namei.c vfs-3d5a648/fs/namei.c
302 --- vfs-4cbe5a5/fs/namei.c 2012-09-01 19:39:58.000000000 +0200
303 +++ vfs-3d5a648/fs/namei.c 2012-09-05 16:35:20.000000000 +0200
306 return security_inode_permission(inode, mask);
308 +EXPORT_SYMBOL(__inode_permission);
311 * sb_permission - Check superblock-level permissions
312 @@ -2822,9 +2823,12 @@
313 error = may_open(&nd->path, acc_mode, open_flag);
316 - file->f_path.mnt = nd->path.mnt;
317 - error = finish_open(file, nd->path.dentry, NULL, opened);
320 + BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
321 + error = vfs_open(&nd->path, file, current_cred());
323 + *opened |= FILE_OPENED;
325 if (error == -EOPENSTALE)
328 diff -Nur -x .git vfs-4cbe5a5/fs/namespace.c vfs-3d5a648/fs/namespace.c
329 --- vfs-4cbe5a5/fs/namespace.c 2012-09-01 19:39:58.000000000 +0200
330 +++ vfs-3d5a648/fs/namespace.c 2012-09-05 16:35:20.000000000 +0200
331 @@ -1387,6 +1387,24 @@
332 release_mounts(&umount_list);
335 +struct vfsmount *clone_private_mount(struct path *path)
337 + struct mount *old_mnt = real_mount(path->mnt);
338 + struct mount *new_mnt;
340 + if (IS_MNT_UNBINDABLE(old_mnt))
341 + return ERR_PTR(-EINVAL);
343 + down_read(&namespace_sem);
344 + new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
345 + up_read(&namespace_sem);
347 + return ERR_PTR(-ENOMEM);
349 + return &new_mnt->mnt;
351 +EXPORT_SYMBOL_GPL(clone_private_mount);
353 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
354 struct vfsmount *root)
356 diff -Nur -x .git vfs-4cbe5a5/fs/open.c vfs-3d5a648/fs/open.c
357 --- vfs-4cbe5a5/fs/open.c 2012-09-01 19:39:58.000000000 +0200
358 +++ vfs-3d5a648/fs/open.c 2012-09-05 16:35:20.000000000 +0200
360 return ERR_PTR(error);
364 - error = do_dentry_open(f, NULL, cred);
365 + error = vfs_open(path, f, cred);
367 error = open_check_o_direct(f);
371 EXPORT_SYMBOL(dentry_open);
374 + * vfs_open - open the file at the given path
375 + * @path: path to open
376 + * @filp: newly allocated file with f_flag initialized
377 + * @cred: credentials to use
379 +int vfs_open(const struct path *path, struct file *filp,
380 + const struct cred *cred)
382 + struct inode *inode = path->dentry->d_inode;
384 + if (inode->i_op->dentry_open)
385 + return inode->i_op->dentry_open(path->dentry, filp, cred);
387 + filp->f_path = *path;
388 + return do_dentry_open(filp, NULL, cred);
391 +EXPORT_SYMBOL(vfs_open);
393 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
395 struct fdtable *fdt = files_fdtable(files);
396 diff -Nur -x .git vfs-4cbe5a5/fs/overlayfs/copy_up.c vfs-3d5a648/fs/overlayfs/copy_up.c
397 --- vfs-4cbe5a5/fs/overlayfs/copy_up.c 1970-01-01 01:00:00.000000000 +0100
398 +++ vfs-3d5a648/fs/overlayfs/copy_up.c 2012-09-05 16:35:20.000000000 +0200
402 + * Copyright (C) 2011 Novell Inc.
404 + * This program is free software; you can redistribute it and/or modify it
405 + * under the terms of the GNU General Public License version 2 as published by
406 + * the Free Software Foundation.
409 +#include <linux/fs.h>
410 +#include <linux/slab.h>
411 +#include <linux/file.h>
412 +#include <linux/splice.h>
413 +#include <linux/xattr.h>
414 +#include <linux/security.h>
415 +#include <linux/uaccess.h>
416 +#include <linux/sched.h>
417 +#include "overlayfs.h"
419 +#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
421 +static int ovl_copy_up_xattr(struct dentry *old, struct dentry *new)
423 + ssize_t list_size, size;
424 + char *buf, *name, *value;
427 + if (!old->d_inode->i_op->getxattr ||
428 + !new->d_inode->i_op->getxattr)
431 + list_size = vfs_listxattr(old, NULL, 0);
432 + if (list_size <= 0) {
433 + if (list_size == -EOPNOTSUPP)
438 + buf = kzalloc(list_size, GFP_KERNEL);
443 + value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
447 + list_size = vfs_listxattr(old, buf, list_size);
448 + if (list_size <= 0) {
450 + goto out_free_value;
453 + for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
454 + size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
457 + goto out_free_value;
459 + error = vfs_setxattr(new, name, value, size, 0);
461 + goto out_free_value;
471 +static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
473 + struct file *old_file;
474 + struct file *new_file;
480 + old_file = ovl_path_open(old, O_RDONLY);
481 + if (IS_ERR(old_file))
482 + return PTR_ERR(old_file);
484 + new_file = ovl_path_open(new, O_WRONLY);
485 + if (IS_ERR(new_file)) {
486 + error = PTR_ERR(new_file);
490 + /* FIXME: copy up sparse files efficiently */
492 + loff_t offset = new_file->f_pos;
493 + size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
496 + if (len < this_len)
499 + if (signal_pending_state(TASK_KILLABLE, current)) {
504 + bytes = do_splice_direct(old_file, &offset, new_file, this_len,
520 +static char *ovl_read_symlink(struct dentry *realdentry)
524 + struct inode *inode = realdentry->d_inode;
525 + mm_segment_t old_fs;
528 + if (!inode->i_op->readlink)
532 + buf = (char *) __get_free_page(GFP_KERNEL);
538 + /* The cast to a user pointer is valid due to the set_fs() */
539 + res = inode->i_op->readlink(realdentry,
540 + (char __user *)buf, PAGE_SIZE - 1);
543 + free_page((unsigned long) buf);
551 + return ERR_PTR(res);
554 +static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
556 + struct iattr attr = {
558 + ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
559 + .ia_atime = stat->atime,
560 + .ia_mtime = stat->mtime,
563 + return notify_change(upperdentry, &attr);
566 +static int ovl_set_mode(struct dentry *upperdentry, umode_t mode)
568 + struct iattr attr = {
569 + .ia_valid = ATTR_MODE,
573 + return notify_change(upperdentry, &attr);
576 +static int ovl_copy_up_locked(struct dentry *upperdir, struct dentry *dentry,
577 + struct path *lowerpath, struct kstat *stat,
581 + struct path newpath;
582 + umode_t mode = stat->mode;
584 + /* Can't properly set mode on creation because of the umask */
585 + stat->mode &= S_IFMT;
587 + ovl_path_upper(dentry, &newpath);
588 + WARN_ON(newpath.dentry);
589 + newpath.dentry = ovl_upper_create(upperdir, dentry, stat, link);
590 + if (IS_ERR(newpath.dentry))
591 + return PTR_ERR(newpath.dentry);
593 + if (S_ISREG(stat->mode)) {
594 + err = ovl_copy_up_data(lowerpath, &newpath, stat->size);
599 + err = ovl_copy_up_xattr(lowerpath->dentry, newpath.dentry);
603 + mutex_lock(&newpath.dentry->d_inode->i_mutex);
604 + if (!S_ISLNK(stat->mode))
605 + err = ovl_set_mode(newpath.dentry, mode);
607 + err = ovl_set_timestamps(newpath.dentry, stat);
608 + mutex_unlock(&newpath.dentry->d_inode->i_mutex);
612 + ovl_dentry_update(dentry, newpath.dentry);
615 + * Easiest way to get rid of the lower dentry reference is to
616 + * drop this dentry. This is neither needed nor possible for
619 + if (!S_ISDIR(stat->mode))
625 + if (S_ISDIR(stat->mode))
626 + vfs_rmdir(upperdir->d_inode, newpath.dentry);
628 + vfs_unlink(upperdir->d_inode, newpath.dentry);
630 + dput(newpath.dentry);
636 + * Copy up a single dentry
638 + * Directory renames only allowed on "pure upper" (already created on
639 + * upper filesystem, never copied up). Directories which are on lower or
640 + * are merged may not be renamed. For these -EXDEV is returned and
641 + * userspace has to deal with it. This means, when copying up a
642 + * directory we can rely on it and ancestors being stable.
644 + * Non-directory renames start with copy up of source if necessary. The
645 + * actual rename will only proceed once the copy up was successful. Copy
646 + * up uses upper parent i_mutex for exclusion. Since rename can change
647 + * d_parent it is possible that the copy up will lock the old parent. At
648 + * that point the file will have already been copied up anyway.
650 +static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
651 + struct path *lowerpath, struct kstat *stat)
654 + struct kstat pstat;
655 + struct path parentpath;
656 + struct dentry *upperdir;
657 + const struct cred *old_cred;
658 + struct cred *override_cred;
661 + ovl_path_upper(parent, &parentpath);
662 + upperdir = parentpath.dentry;
664 + err = vfs_getattr(parentpath.mnt, parentpath.dentry, &pstat);
668 + if (S_ISLNK(stat->mode)) {
669 + link = ovl_read_symlink(lowerpath->dentry);
671 + return PTR_ERR(link);
675 + override_cred = prepare_creds();
676 + if (!override_cred)
677 + goto out_free_link;
679 + override_cred->fsuid = stat->uid;
680 + override_cred->fsgid = stat->gid;
682 + * CAP_SYS_ADMIN for copying up extended attributes
683 + * CAP_DAC_OVERRIDE for create
684 + * CAP_FOWNER for chmod, timestamp update
685 + * CAP_FSETID for chmod
686 + * CAP_MKNOD for mknod
688 + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
689 + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
690 + cap_raise(override_cred->cap_effective, CAP_FOWNER);
691 + cap_raise(override_cred->cap_effective, CAP_FSETID);
692 + cap_raise(override_cred->cap_effective, CAP_MKNOD);
693 + old_cred = override_creds(override_cred);
695 + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
696 + if (ovl_path_type(dentry) != OVL_PATH_LOWER) {
699 + err = ovl_copy_up_locked(upperdir, dentry, lowerpath,
702 + /* Restore timestamps on parent (best effort) */
703 + ovl_set_timestamps(upperdir, &pstat);
707 + mutex_unlock(&upperdir->d_inode->i_mutex);
709 + revert_creds(old_cred);
710 + put_cred(override_cred);
714 + free_page((unsigned long) link);
719 +int ovl_copy_up(struct dentry *dentry)
725 + struct dentry *next;
726 + struct dentry *parent;
727 + struct path lowerpath;
729 + enum ovl_path_type type = ovl_path_type(dentry);
731 + if (type != OVL_PATH_LOWER)
734 + next = dget(dentry);
735 + /* find the topmost dentry not yet copied up */
737 + parent = dget_parent(next);
739 + type = ovl_path_type(parent);
740 + if (type != OVL_PATH_LOWER)
747 + ovl_path_lower(next, &lowerpath);
748 + err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat);
750 + err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
759 +/* Optimize by not copying up the file first and truncating later */
760 +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size)
764 + struct path lowerpath;
765 + struct dentry *parent = dget_parent(dentry);
767 + err = ovl_copy_up(parent);
769 + goto out_dput_parent;
771 + ovl_path_lower(dentry, &lowerpath);
772 + err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat);
774 + goto out_dput_parent;
776 + if (size < stat.size)
779 + err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
785 diff -Nur -x .git vfs-4cbe5a5/fs/overlayfs/dir.c vfs-3d5a648/fs/overlayfs/dir.c
786 --- vfs-4cbe5a5/fs/overlayfs/dir.c 1970-01-01 01:00:00.000000000 +0100
787 +++ vfs-3d5a648/fs/overlayfs/dir.c 2012-09-05 16:35:20.000000000 +0200
791 + * Copyright (C) 2011 Novell Inc.
793 + * This program is free software; you can redistribute it and/or modify it
794 + * under the terms of the GNU General Public License version 2 as published by
795 + * the Free Software Foundation.
798 +#include <linux/fs.h>
799 +#include <linux/namei.h>
800 +#include <linux/xattr.h>
801 +#include <linux/security.h>
802 +#include <linux/cred.h>
803 +#include "overlayfs.h"
805 +static const char *ovl_whiteout_symlink = "(overlay-whiteout)";
807 +static int ovl_whiteout(struct dentry *upperdir, struct dentry *dentry)
810 + struct dentry *newdentry;
811 + const struct cred *old_cred;
812 + struct cred *override_cred;
814 + /* FIXME: recheck lower dentry to see if whiteout is really needed */
817 + override_cred = prepare_creds();
818 + if (!override_cred)
822 + * CAP_SYS_ADMIN for setxattr
823 + * CAP_DAC_OVERRIDE for symlink creation
824 + * CAP_FOWNER for unlink in sticky directory
826 + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
827 + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
828 + cap_raise(override_cred->cap_effective, CAP_FOWNER);
829 + override_cred->fsuid = GLOBAL_ROOT_UID;
830 + override_cred->fsgid = GLOBAL_ROOT_GID;
831 + old_cred = override_creds(override_cred);
833 + newdentry = lookup_one_len(dentry->d_name.name, upperdir,
834 + dentry->d_name.len);
835 + err = PTR_ERR(newdentry);
836 + if (IS_ERR(newdentry))
839 + /* Just been removed within the same locked region */
840 + WARN_ON(newdentry->d_inode);
842 + err = vfs_symlink(upperdir->d_inode, newdentry, ovl_whiteout_symlink);
846 + ovl_dentry_version_inc(dentry->d_parent);
848 + err = vfs_setxattr(newdentry, ovl_whiteout_xattr, "y", 1, 0);
850 + vfs_unlink(upperdir->d_inode, newdentry);
855 + revert_creds(old_cred);
856 + put_cred(override_cred);
860 + * There's no way to recover from failure to whiteout.
861 + * What should we do? Log a big fat error and... ?
863 + printk(KERN_ERR "overlayfs: ERROR - failed to whiteout '%s'\n",
864 + dentry->d_name.name);
870 +static struct dentry *ovl_lookup_create(struct dentry *upperdir,
871 + struct dentry *template)
874 + struct dentry *newdentry;
875 + struct qstr *name = &template->d_name;
877 + newdentry = lookup_one_len(name->name, upperdir, name->len);
878 + if (IS_ERR(newdentry))
881 + if (newdentry->d_inode) {
882 + const struct cred *old_cred;
883 + struct cred *override_cred;
885 + /* No need to check whiteout if lower parent is non-existent */
887 + if (!ovl_dentry_lower(template->d_parent))
890 + if (!S_ISLNK(newdentry->d_inode->i_mode))
894 + override_cred = prepare_creds();
895 + if (!override_cred)
899 + * CAP_SYS_ADMIN for getxattr
900 + * CAP_FOWNER for unlink in sticky directory
902 + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
903 + cap_raise(override_cred->cap_effective, CAP_FOWNER);
904 + old_cred = override_creds(override_cred);
907 + if (ovl_is_whiteout(newdentry))
908 + err = vfs_unlink(upperdir->d_inode, newdentry);
910 + revert_creds(old_cred);
911 + put_cred(override_cred);
916 + newdentry = lookup_one_len(name->name, upperdir, name->len);
917 + if (IS_ERR(newdentry)) {
918 + ovl_whiteout(upperdir, template);
923 + * Whiteout just been successfully removed, parent
924 + * i_mutex is still held, there's no way the lookup
925 + * could return positive.
927 + WARN_ON(newdentry->d_inode);
934 + return ERR_PTR(err);
937 +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
938 + struct kstat *stat, const char *link)
941 + struct dentry *newdentry;
942 + struct inode *dir = upperdir->d_inode;
944 + newdentry = ovl_lookup_create(upperdir, dentry);
945 + if (IS_ERR(newdentry))
948 + switch (stat->mode & S_IFMT) {
950 + err = vfs_create(dir, newdentry, stat->mode, NULL);
954 + err = vfs_mkdir(dir, newdentry, stat->mode);
961 + err = vfs_mknod(dir, newdentry, stat->mode, stat->rdev);
965 + err = vfs_symlink(dir, newdentry, link);
972 + if (ovl_dentry_is_opaque(dentry))
973 + ovl_whiteout(upperdir, dentry);
975 + newdentry = ERR_PTR(err);
976 + } else if (WARN_ON(!newdentry->d_inode)) {
978 + * Not quite sure if non-instantiated dentry is legal or not.
979 + * VFS doesn't seem to care so check and warn here.
982 + newdentry = ERR_PTR(-ENOENT);
990 +static int ovl_set_opaque(struct dentry *upperdentry)
993 + const struct cred *old_cred;
994 + struct cred *override_cred;
996 + override_cred = prepare_creds();
997 + if (!override_cred)
1000 + /* CAP_SYS_ADMIN for setxattr of "trusted" namespace */
1001 + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
1002 + old_cred = override_creds(override_cred);
1003 + err = vfs_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
1004 + revert_creds(old_cred);
1005 + put_cred(override_cred);
1010 +static int ovl_remove_opaque(struct dentry *upperdentry)
1013 + const struct cred *old_cred;
1014 + struct cred *override_cred;
1016 + override_cred = prepare_creds();
1017 + if (!override_cred)
1020 + /* CAP_SYS_ADMIN for removexattr of "trusted" namespace */
1021 + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
1022 + old_cred = override_creds(override_cred);
1023 + err = vfs_removexattr(upperdentry, ovl_opaque_xattr);
1024 + revert_creds(old_cred);
1025 + put_cred(override_cred);
1030 +static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
1031 + struct kstat *stat)
1034 + enum ovl_path_type type;
1035 + struct path realpath;
1037 + type = ovl_path_real(dentry, &realpath);
1038 + err = vfs_getattr(realpath.mnt, realpath.dentry, stat);
1042 + stat->dev = dentry->d_sb->s_dev;
1043 + stat->ino = dentry->d_inode->i_ino;
1046 + * It's probably not worth it to count subdirs to get the
1047 + * correct link count. nlink=1 seems to pacify 'find' and
1048 + * other utilities.
1050 + if (type == OVL_PATH_MERGE)
1056 +static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
1060 + struct dentry *newdentry;
1061 + struct dentry *upperdir;
1062 + struct inode *inode;
1063 + struct kstat stat = {
1069 + inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
1073 + err = ovl_copy_up(dentry->d_parent);
1077 + upperdir = ovl_dentry_upper(dentry->d_parent);
1078 + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
1080 + newdentry = ovl_upper_create(upperdir, dentry, &stat, link);
1081 + err = PTR_ERR(newdentry);
1082 + if (IS_ERR(newdentry))
1085 + ovl_dentry_version_inc(dentry->d_parent);
1086 + if (ovl_dentry_is_opaque(dentry) && S_ISDIR(mode)) {
1087 + err = ovl_set_opaque(newdentry);
1089 + vfs_rmdir(upperdir->d_inode, newdentry);
1090 + ovl_whiteout(upperdir, dentry);
1094 + ovl_dentry_update(dentry, newdentry);
1095 + ovl_copyattr(newdentry->d_inode, inode);
1096 + d_instantiate(dentry, inode);
1104 + mutex_unlock(&upperdir->d_inode->i_mutex);
1111 +static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
1114 + return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
1117 +static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1119 + return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
1122 +static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
1125 + return ovl_create_object(dentry, mode, rdev, NULL);
1128 +static int ovl_symlink(struct inode *dir, struct dentry *dentry,
1131 + return ovl_create_object(dentry, S_IFLNK, 0, link);
1134 +static int ovl_do_remove(struct dentry *dentry, bool is_dir)
1137 + enum ovl_path_type type;
1138 + struct path realpath;
1139 + struct dentry *upperdir;
1141 + err = ovl_copy_up(dentry->d_parent);
1145 + upperdir = ovl_dentry_upper(dentry->d_parent);
1146 + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
1147 + type = ovl_path_real(dentry, &realpath);
1148 + if (type != OVL_PATH_LOWER) {
1150 + if (realpath.dentry->d_parent != upperdir)
1153 + /* FIXME: create whiteout up front and rename to target */
1156 + err = vfs_rmdir(upperdir->d_inode, realpath.dentry);
1158 + err = vfs_unlink(upperdir->d_inode, realpath.dentry);
1162 + ovl_dentry_version_inc(dentry->d_parent);
1165 + if (type != OVL_PATH_UPPER || ovl_dentry_is_opaque(dentry))
1166 + err = ovl_whiteout(upperdir, dentry);
1169 + * Keeping this dentry hashed would mean having to release
1170 + * upperpath/lowerpath, which could only be done if we are the
1171 + * sole user of this dentry. Too tricky... Just unhash for
1176 + mutex_unlock(&upperdir->d_inode->i_mutex);
1181 +static int ovl_unlink(struct inode *dir, struct dentry *dentry)
1183 + return ovl_do_remove(dentry, false);
1187 +static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
1190 + enum ovl_path_type type;
1192 + type = ovl_path_type(dentry);
1193 + if (type != OVL_PATH_UPPER) {
1194 + err = ovl_check_empty_and_clear(dentry, type);
1199 + return ovl_do_remove(dentry, true);
1202 +static int ovl_link(struct dentry *old, struct inode *newdir,
1203 + struct dentry *new)
1206 + struct dentry *olddentry;
1207 + struct dentry *newdentry;
1208 + struct dentry *upperdir;
1209 + struct inode *newinode;
1211 + err = ovl_copy_up(old);
1215 + err = ovl_copy_up(new->d_parent);
1219 + upperdir = ovl_dentry_upper(new->d_parent);
1220 + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
1221 + newdentry = ovl_lookup_create(upperdir, new);
1222 + err = PTR_ERR(newdentry);
1223 + if (IS_ERR(newdentry))
1226 + olddentry = ovl_dentry_upper(old);
1227 + err = vfs_link(olddentry, upperdir->d_inode, newdentry);
1229 + if (WARN_ON(!newdentry->d_inode)) {
1234 + newinode = ovl_new_inode(old->d_sb, newdentry->d_inode->i_mode,
1238 + ovl_copyattr(upperdir->d_inode, newinode);
1240 + ovl_dentry_version_inc(new->d_parent);
1241 + ovl_dentry_update(new, newdentry);
1243 + d_instantiate(new, newinode);
1246 + if (ovl_dentry_is_opaque(new))
1247 + ovl_whiteout(upperdir, new);
1251 + mutex_unlock(&upperdir->d_inode->i_mutex);
1257 +static int ovl_rename(struct inode *olddir, struct dentry *old,
1258 + struct inode *newdir, struct dentry *new)
1261 + enum ovl_path_type old_type;
1262 + enum ovl_path_type new_type;
1263 + struct dentry *old_upperdir;
1264 + struct dentry *new_upperdir;
1265 + struct dentry *olddentry;
1266 + struct dentry *newdentry;
1267 + struct dentry *trap;
1270 + bool new_create = false;
1271 + bool is_dir = S_ISDIR(old->d_inode->i_mode);
1273 + /* Don't copy up directory trees */
1274 + old_type = ovl_path_type(old);
1275 + if (old_type != OVL_PATH_UPPER && is_dir)
1278 + if (new->d_inode) {
1279 + new_type = ovl_path_type(new);
1281 + if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
1282 + if (ovl_dentry_lower(old)->d_inode ==
1283 + ovl_dentry_lower(new)->d_inode)
1286 + if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
1287 + if (ovl_dentry_upper(old)->d_inode ==
1288 + ovl_dentry_upper(new)->d_inode)
1292 + if (new_type != OVL_PATH_UPPER &&
1293 + S_ISDIR(new->d_inode->i_mode)) {
1294 + err = ovl_check_empty_and_clear(new, new_type);
1299 + new_type = OVL_PATH_UPPER;
1302 + err = ovl_copy_up(old);
1306 + err = ovl_copy_up(new->d_parent);
1310 + old_upperdir = ovl_dentry_upper(old->d_parent);
1311 + new_upperdir = ovl_dentry_upper(new->d_parent);
1313 + trap = lock_rename(new_upperdir, old_upperdir);
1315 + olddentry = ovl_dentry_upper(old);
1316 + newdentry = ovl_dentry_upper(new);
1320 + new_create = true;
1321 + newdentry = ovl_lookup_create(new_upperdir, new);
1322 + err = PTR_ERR(newdentry);
1323 + if (IS_ERR(newdentry))
1328 + if (olddentry->d_parent != old_upperdir)
1330 + if (newdentry->d_parent != new_upperdir)
1332 + if (olddentry == trap)
1334 + if (newdentry == trap)
1337 + old_opaque = ovl_dentry_is_opaque(old);
1338 + new_opaque = ovl_dentry_is_opaque(new) || new_type != OVL_PATH_UPPER;
1340 + if (is_dir && !old_opaque && new_opaque) {
1341 + err = ovl_set_opaque(olddentry);
1346 + err = vfs_rename(old_upperdir->d_inode, olddentry,
1347 + new_upperdir->d_inode, newdentry);
1350 + if (new_create && ovl_dentry_is_opaque(new))
1351 + ovl_whiteout(new_upperdir, new);
1352 + if (is_dir && !old_opaque && new_opaque)
1353 + ovl_remove_opaque(olddentry);
1357 + if (old_type != OVL_PATH_UPPER || old_opaque)
1358 + err = ovl_whiteout(old_upperdir, old);
1359 + if (is_dir && old_opaque && !new_opaque)
1360 + ovl_remove_opaque(olddentry);
1362 + if (old_opaque != new_opaque)
1363 + ovl_dentry_set_opaque(old, new_opaque);
1365 + ovl_dentry_version_inc(old->d_parent);
1366 + ovl_dentry_version_inc(new->d_parent);
1371 + unlock_rename(new_upperdir, old_upperdir);
1375 +const struct inode_operations ovl_dir_inode_operations = {
1376 + .lookup = ovl_lookup,
1377 + .mkdir = ovl_mkdir,
1378 + .symlink = ovl_symlink,
1379 + .unlink = ovl_unlink,
1380 + .rmdir = ovl_rmdir,
1381 + .rename = ovl_rename,
1383 + .setattr = ovl_setattr,
1384 + .create = ovl_create,
1385 + .mknod = ovl_mknod,
1386 + .permission = ovl_permission,
1387 + .getattr = ovl_dir_getattr,
1388 + .setxattr = ovl_setxattr,
1389 + .getxattr = ovl_getxattr,
1390 + .listxattr = ovl_listxattr,
1391 + .removexattr = ovl_removexattr,
1393 diff -Nur -x .git vfs-4cbe5a5/fs/overlayfs/inode.c vfs-3d5a648/fs/overlayfs/inode.c
1394 --- vfs-4cbe5a5/fs/overlayfs/inode.c 1970-01-01 01:00:00.000000000 +0100
1395 +++ vfs-3d5a648/fs/overlayfs/inode.c 2012-09-05 16:35:20.000000000 +0200
1399 + * Copyright (C) 2011 Novell Inc.
1401 + * This program is free software; you can redistribute it and/or modify it
1402 + * under the terms of the GNU General Public License version 2 as published by
1403 + * the Free Software Foundation.
1406 +#include <linux/fs.h>
1407 +#include <linux/slab.h>
1408 +#include <linux/xattr.h>
1409 +#include "overlayfs.h"
1411 +int ovl_setattr(struct dentry *dentry, struct iattr *attr)
1413 + struct dentry *upperdentry;
1416 + if ((attr->ia_valid & ATTR_SIZE) && !ovl_dentry_upper(dentry))
1417 + err = ovl_copy_up_truncate(dentry, attr->ia_size);
1419 + err = ovl_copy_up(dentry);
1423 + upperdentry = ovl_dentry_upper(dentry);
1425 + if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
1426 + attr->ia_valid &= ~ATTR_MODE;
1428 + mutex_lock(&upperdentry->d_inode->i_mutex);
1429 + err = notify_change(upperdentry, attr);
1431 + ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
1432 + mutex_unlock(&upperdentry->d_inode->i_mutex);
1437 +static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
1438 + struct kstat *stat)
1440 + struct path realpath;
1442 + ovl_path_real(dentry, &realpath);
1443 + return vfs_getattr(realpath.mnt, realpath.dentry, stat);
1446 +int ovl_permission(struct inode *inode, int mask)
1448 + struct ovl_entry *oe;
1449 + struct dentry *alias = NULL;
1450 + struct inode *realinode;
1451 + struct dentry *realdentry;
1455 + if (S_ISDIR(inode->i_mode)) {
1456 + oe = inode->i_private;
1457 + } else if (mask & MAY_NOT_BLOCK) {
1461 + * For non-directories find an alias and get the info
1464 + alias = d_find_any_alias(inode);
1465 + if (WARN_ON(!alias))
1468 + oe = alias->d_fsdata;
1471 + realdentry = ovl_entry_real(oe, &is_upper);
1473 + /* Careful in RCU walk mode */
1474 + realinode = ACCESS_ONCE(realdentry->d_inode);
1476 + WARN_ON(!(mask & MAY_NOT_BLOCK));
1481 + if (mask & MAY_WRITE) {
1482 + umode_t mode = realinode->i_mode;
1485 + * Writes will always be redirected to upper layer, so
1486 + * ignore lower layer being read-only.
1488 + * If the overlay itself is read-only then proceed
1489 + * with the permission check, don't return EROFS.
1490 + * This will only happen if this is the lower layer of
1491 + * another overlayfs.
1493 + * If upper fs becomes read-only after the overlay was
1494 + * constructed return EROFS to prevent modification of
1498 + if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
1499 + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
1503 + err = __inode_permission(realinode, mask);
1510 +struct ovl_link_data {
1511 + struct dentry *realdentry;
1515 +static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
1518 + struct dentry *realdentry;
1519 + struct inode *realinode;
1521 + realdentry = ovl_dentry_real(dentry);
1522 + realinode = realdentry->d_inode;
1524 + if (WARN_ON(!realinode->i_op->follow_link))
1525 + return ERR_PTR(-EPERM);
1527 + ret = realinode->i_op->follow_link(realdentry, nd);
1531 + if (realinode->i_op->put_link) {
1532 + struct ovl_link_data *data;
1534 + data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
1536 + realinode->i_op->put_link(realdentry, nd, ret);
1537 + return ERR_PTR(-ENOMEM);
1539 + data->realdentry = realdentry;
1540 + data->cookie = ret;
1548 +static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
1550 + struct inode *realinode;
1551 + struct ovl_link_data *data = c;
1556 + realinode = data->realdentry->d_inode;
1557 + realinode->i_op->put_link(data->realdentry, nd, data->cookie);
1561 +static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
1563 + struct path realpath;
1564 + struct inode *realinode;
1566 + ovl_path_real(dentry, &realpath);
1567 + realinode = realpath.dentry->d_inode;
1569 + if (!realinode->i_op->readlink)
1572 + touch_atime(&realpath);
1574 + return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
1578 +static bool ovl_is_private_xattr(const char *name)
1580 + return strncmp(name, "trusted.overlay.", 14) == 0;
1583 +int ovl_setxattr(struct dentry *dentry, const char *name,
1584 + const void *value, size_t size, int flags)
1587 + struct dentry *upperdentry;
1589 + if (ovl_is_private_xattr(name))
1592 + err = ovl_copy_up(dentry);
1596 + upperdentry = ovl_dentry_upper(dentry);
1597 + return vfs_setxattr(upperdentry, name, value, size, flags);
1600 +ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
1601 + void *value, size_t size)
1603 + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
1604 + ovl_is_private_xattr(name))
1607 + return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
1610 +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
1615 + res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
1616 + if (res <= 0 || size == 0)
1619 + if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
1622 + /* filter out private xattrs */
1623 + for (off = 0; off < res;) {
1624 + char *s = list + off;
1625 + size_t slen = strlen(s) + 1;
1627 + BUG_ON(off + slen > res);
1629 + if (ovl_is_private_xattr(s)) {
1631 + memmove(s, s + slen, res - off);
1640 +int ovl_removexattr(struct dentry *dentry, const char *name)
1643 + struct path realpath;
1644 + enum ovl_path_type type;
1646 + if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
1647 + ovl_is_private_xattr(name))
1650 + type = ovl_path_real(dentry, &realpath);
1651 + if (type == OVL_PATH_LOWER) {
1652 + err = vfs_getxattr(realpath.dentry, name, NULL, 0);
1656 + err = ovl_copy_up(dentry);
1660 + ovl_path_upper(dentry, &realpath);
1663 + return vfs_removexattr(realpath.dentry, name);
1666 +static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
1667 + struct dentry *realdentry)
1669 + if (type != OVL_PATH_LOWER)
1672 + if (special_file(realdentry->d_inode->i_mode))
1675 + if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
1681 +static int ovl_dentry_open(struct dentry *dentry, struct file *file,
1682 + const struct cred *cred)
1685 + struct path realpath;
1686 + enum ovl_path_type type;
1688 + type = ovl_path_real(dentry, &realpath);
1689 + if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
1690 + if (file->f_flags & O_TRUNC)
1691 + err = ovl_copy_up_truncate(dentry, 0);
1693 + err = ovl_copy_up(dentry);
1697 + ovl_path_upper(dentry, &realpath);
1700 + return vfs_open(&realpath, file, cred);
1703 +static const struct inode_operations ovl_file_inode_operations = {
1704 + .setattr = ovl_setattr,
1705 + .permission = ovl_permission,
1706 + .getattr = ovl_getattr,
1707 + .setxattr = ovl_setxattr,
1708 + .getxattr = ovl_getxattr,
1709 + .listxattr = ovl_listxattr,
1710 + .removexattr = ovl_removexattr,
1711 + .dentry_open = ovl_dentry_open,
1714 +static const struct inode_operations ovl_symlink_inode_operations = {
1715 + .setattr = ovl_setattr,
1716 + .follow_link = ovl_follow_link,
1717 + .put_link = ovl_put_link,
1718 + .readlink = ovl_readlink,
1719 + .getattr = ovl_getattr,
1720 + .setxattr = ovl_setxattr,
1721 + .getxattr = ovl_getxattr,
1722 + .listxattr = ovl_listxattr,
1723 + .removexattr = ovl_removexattr,
1726 +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
1727 + struct ovl_entry *oe)
1729 + struct inode *inode;
1731 + inode = new_inode(sb);
1737 + inode->i_ino = get_next_ino();
1738 + inode->i_mode = mode;
1739 + inode->i_flags |= S_NOATIME | S_NOCMTIME;
1743 + inode->i_private = oe;
1744 + inode->i_op = &ovl_dir_inode_operations;
1745 + inode->i_fop = &ovl_dir_operations;
1749 + inode->i_op = &ovl_symlink_inode_operations;
1757 + inode->i_op = &ovl_file_inode_operations;
1761 + WARN(1, "illegal file type: %i\n", mode);
1769 diff -Nur -x .git vfs-4cbe5a5/fs/overlayfs/Kconfig vfs-3d5a648/fs/overlayfs/Kconfig
1770 --- vfs-4cbe5a5/fs/overlayfs/Kconfig 1970-01-01 01:00:00.000000000 +0100
1771 +++ vfs-3d5a648/fs/overlayfs/Kconfig 2012-09-05 16:35:20.000000000 +0200
1773 +config OVERLAYFS_FS
1774 + tristate "Overlay filesystem support"
1776 + Add support for overlay filesystem.
1777 diff -Nur -x .git vfs-4cbe5a5/fs/overlayfs/Makefile vfs-3d5a648/fs/overlayfs/Makefile
1778 --- vfs-4cbe5a5/fs/overlayfs/Makefile 1970-01-01 01:00:00.000000000 +0100
1779 +++ vfs-3d5a648/fs/overlayfs/Makefile 2012-09-05 16:35:20.000000000 +0200
1782 +# Makefile for the overlay filesystem.
1785 +obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
1787 +overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o
1788 diff -Nur -x .git vfs-4cbe5a5/fs/overlayfs/overlayfs.h vfs-3d5a648/fs/overlayfs/overlayfs.h
1789 --- vfs-4cbe5a5/fs/overlayfs/overlayfs.h 1970-01-01 01:00:00.000000000 +0100
1790 +++ vfs-3d5a648/fs/overlayfs/overlayfs.h 2012-09-05 16:35:20.000000000 +0200
1794 + * Copyright (C) 2011 Novell Inc.
1796 + * This program is free software; you can redistribute it and/or modify it
1797 + * under the terms of the GNU General Public License version 2 as published by
1798 + * the Free Software Foundation.
1803 +enum ovl_path_type {
1809 +extern const char *ovl_opaque_xattr;
1810 +extern const char *ovl_whiteout_xattr;
1811 +extern const struct dentry_operations ovl_dentry_operations;
1813 +enum ovl_path_type ovl_path_type(struct dentry *dentry);
1814 +u64 ovl_dentry_version_get(struct dentry *dentry);
1815 +void ovl_dentry_version_inc(struct dentry *dentry);
1816 +void ovl_path_upper(struct dentry *dentry, struct path *path);
1817 +void ovl_path_lower(struct dentry *dentry, struct path *path);
1818 +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
1819 +struct dentry *ovl_dentry_upper(struct dentry *dentry);
1820 +struct dentry *ovl_dentry_lower(struct dentry *dentry);
1821 +struct dentry *ovl_dentry_real(struct dentry *dentry);
1822 +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
1823 +bool ovl_dentry_is_opaque(struct dentry *dentry);
1824 +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
1825 +bool ovl_is_whiteout(struct dentry *dentry);
1826 +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
1827 +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
1828 + unsigned int flags);
1829 +struct file *ovl_path_open(struct path *path, int flags);
1831 +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
1832 + struct kstat *stat, const char *link);
1835 +extern const struct file_operations ovl_dir_operations;
1836 +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type);
1839 +int ovl_setattr(struct dentry *dentry, struct iattr *attr);
1840 +int ovl_permission(struct inode *inode, int mask);
1841 +int ovl_setxattr(struct dentry *dentry, const char *name,
1842 + const void *value, size_t size, int flags);
1843 +ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
1844 + void *value, size_t size);
1845 +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
1846 +int ovl_removexattr(struct dentry *dentry, const char *name);
1848 +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
1849 + struct ovl_entry *oe);
1850 +static inline void ovl_copyattr(struct inode *from, struct inode *to)
1852 + to->i_uid = from->i_uid;
1853 + to->i_gid = from->i_gid;
1857 +extern const struct inode_operations ovl_dir_inode_operations;
1860 +int ovl_copy_up(struct dentry *dentry);
1861 +int ovl_copy_up_truncate(struct dentry *dentry, loff_t size);
1862 diff -Nur -x .git vfs-4cbe5a5/fs/overlayfs/readdir.c vfs-3d5a648/fs/overlayfs/readdir.c
1863 --- vfs-4cbe5a5/fs/overlayfs/readdir.c 1970-01-01 01:00:00.000000000 +0100
1864 +++ vfs-3d5a648/fs/overlayfs/readdir.c 2012-09-05 16:35:20.000000000 +0200
1868 + * Copyright (C) 2011 Novell Inc.
1870 + * This program is free software; you can redistribute it and/or modify it
1871 + * under the terms of the GNU General Public License version 2 as published by
1872 + * the Free Software Foundation.
1875 +#include <linux/fs.h>
1876 +#include <linux/slab.h>
1877 +#include <linux/namei.h>
1878 +#include <linux/file.h>
1879 +#include <linux/xattr.h>
1880 +#include <linux/rbtree.h>
1881 +#include <linux/security.h>
1882 +#include <linux/cred.h>
1883 +#include "overlayfs.h"
1885 +struct ovl_cache_entry {
1888 + unsigned int type;
1891 + struct list_head l_node;
1892 + struct rb_node node;
1895 +struct ovl_readdir_data {
1896 + struct rb_root *root;
1897 + struct list_head *list;
1898 + struct list_head *middle;
1899 + struct dentry *dir;
1904 +struct ovl_dir_file {
1907 + struct list_head cursor;
1908 + u64 cache_version;
1909 + struct list_head cache;
1910 + struct file *realfile;
1913 +static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
1915 + return container_of(n, struct ovl_cache_entry, node);
1918 +static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
1919 + const char *name, int len)
1921 + struct rb_node *node = root->rb_node;
1925 + struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
1927 + cmp = strncmp(name, p->name, len);
1929 + node = p->node.rb_right;
1930 + else if (cmp < 0 || len < p->len)
1931 + node = p->node.rb_left;
1939 +static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
1940 + u64 ino, unsigned int d_type)
1942 + struct ovl_cache_entry *p;
1944 + p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL);
1946 + char *name_copy = (char *) (p + 1);
1947 + memcpy(name_copy, name, len);
1948 + name_copy[len] = '\0';
1949 + p->name = name_copy;
1953 + p->is_whiteout = false;
1959 +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
1960 + const char *name, int len, u64 ino,
1961 + unsigned int d_type)
1963 + struct rb_node **newp = &rdd->root->rb_node;
1964 + struct rb_node *parent = NULL;
1965 + struct ovl_cache_entry *p;
1969 + struct ovl_cache_entry *tmp;
1972 + tmp = ovl_cache_entry_from_node(*newp);
1973 + cmp = strncmp(name, tmp->name, len);
1975 + newp = &tmp->node.rb_right;
1976 + else if (cmp < 0 || len < tmp->len)
1977 + newp = &tmp->node.rb_left;
1982 + p = ovl_cache_entry_new(name, len, ino, d_type);
1986 + list_add_tail(&p->l_node, rdd->list);
1987 + rb_link_node(&p->node, parent, newp);
1988 + rb_insert_color(&p->node, rdd->root);
1993 +static int ovl_fill_lower(void *buf, const char *name, int namelen,
1994 + loff_t offset, u64 ino, unsigned int d_type)
1996 + struct ovl_readdir_data *rdd = buf;
1997 + struct ovl_cache_entry *p;
2000 + p = ovl_cache_entry_find(rdd->root, name, namelen);
2002 + list_move_tail(&p->l_node, rdd->middle);
2004 + p = ovl_cache_entry_new(name, namelen, ino, d_type);
2006 + rdd->err = -ENOMEM;
2008 + list_add_tail(&p->l_node, rdd->middle);
2014 +static void ovl_cache_free(struct list_head *list)
2016 + struct ovl_cache_entry *p;
2017 + struct ovl_cache_entry *n;
2019 + list_for_each_entry_safe(p, n, list, l_node)
2022 + INIT_LIST_HEAD(list);
2025 +static int ovl_fill_upper(void *buf, const char *name, int namelen,
2026 + loff_t offset, u64 ino, unsigned int d_type)
2028 + struct ovl_readdir_data *rdd = buf;
2031 + return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
2034 +static inline int ovl_dir_read(struct path *realpath,
2035 + struct ovl_readdir_data *rdd, filldir_t filler)
2037 + struct file *realfile;
2040 + realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
2041 + if (IS_ERR(realfile))
2042 + return PTR_ERR(realfile);
2047 + err = vfs_readdir(realfile, filler, rdd);
2050 + } while (!err && rdd->count);
2056 +static void ovl_dir_reset(struct file *file)
2058 + struct ovl_dir_file *od = file->private_data;
2059 + enum ovl_path_type type = ovl_path_type(file->f_path.dentry);
2061 + if (ovl_dentry_version_get(file->f_path.dentry) != od->cache_version) {
2062 + list_del_init(&od->cursor);
2063 + ovl_cache_free(&od->cache);
2064 + od->is_cached = false;
2066 + WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
2067 + if (od->is_real && type == OVL_PATH_MERGE) {
2068 + fput(od->realfile);
2069 + od->realfile = NULL;
2070 + od->is_real = false;
2074 +static int ovl_dir_mark_whiteouts(struct ovl_readdir_data *rdd)
2076 + struct ovl_cache_entry *p;
2077 + struct dentry *dentry;
2078 + const struct cred *old_cred;
2079 + struct cred *override_cred;
2081 + override_cred = prepare_creds();
2082 + if (!override_cred) {
2083 + ovl_cache_free(rdd->list);
2088 + * CAP_SYS_ADMIN for getxattr
2089 + * CAP_DAC_OVERRIDE for lookup
2091 + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
2092 + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
2093 + old_cred = override_creds(override_cred);
2095 + mutex_lock(&rdd->dir->d_inode->i_mutex);
2096 + list_for_each_entry(p, rdd->list, l_node) {
2097 + if (p->type != DT_LNK)
2100 + dentry = lookup_one_len(p->name, rdd->dir, p->len);
2101 + if (IS_ERR(dentry))
2104 + p->is_whiteout = ovl_is_whiteout(dentry);
2107 + mutex_unlock(&rdd->dir->d_inode->i_mutex);
2109 + revert_creds(old_cred);
2110 + put_cred(override_cred);
2115 +static inline int ovl_dir_read_merged(struct path *upperpath,
2116 + struct path *lowerpath,
2117 + struct ovl_readdir_data *rdd)
2120 + struct rb_root root = RB_ROOT;
2121 + struct list_head middle;
2123 + rdd->root = &root;
2124 + if (upperpath->dentry) {
2125 + rdd->dir = upperpath->dentry;
2126 + err = ovl_dir_read(upperpath, rdd, ovl_fill_upper);
2130 + err = ovl_dir_mark_whiteouts(rdd);
2135 + * Insert lowerpath entries before upperpath ones, this allows
2136 + * offsets to be reasonably constant
2138 + list_add(&middle, rdd->list);
2139 + rdd->middle = &middle;
2140 + err = ovl_dir_read(lowerpath, rdd, ovl_fill_lower);
2141 + list_del(&middle);
2148 +static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
2150 + struct list_head *l;
2153 + l = od->cache.next;
2154 + for (off = 0; off < pos; off++) {
2155 + if (l == &od->cache)
2159 + list_move_tail(&od->cursor, l);
2162 +static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
2164 + struct ovl_dir_file *od = file->private_data;
2168 + ovl_dir_reset(file);
2170 + if (od->is_real) {
2171 + res = vfs_readdir(od->realfile, filler, buf);
2172 + file->f_pos = od->realfile->f_pos;
2177 + if (!od->is_cached) {
2178 + struct path lowerpath;
2179 + struct path upperpath;
2180 + struct ovl_readdir_data rdd = { .list = &od->cache };
2182 + ovl_path_lower(file->f_path.dentry, &lowerpath);
2183 + ovl_path_upper(file->f_path.dentry, &upperpath);
2185 + res = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd);
2187 + ovl_cache_free(rdd.list);
2191 + od->cache_version = ovl_dentry_version_get(file->f_path.dentry);
2192 + od->is_cached = true;
2194 + ovl_seek_cursor(od, file->f_pos);
2197 + while (od->cursor.next != &od->cache) {
2200 + struct ovl_cache_entry *p;
2202 + p = list_entry(od->cursor.next, struct ovl_cache_entry, l_node);
2203 + off = file->f_pos;
2204 + if (!p->is_whiteout) {
2205 + over = filler(buf, p->name, p->len, off, p->ino,
2211 + list_move(&od->cursor, &p->l_node);
2217 +static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
2220 + struct ovl_dir_file *od = file->private_data;
2222 + mutex_lock(&file->f_dentry->d_inode->i_mutex);
2224 + ovl_dir_reset(file);
2226 + if (od->is_real) {
2227 + res = vfs_llseek(od->realfile, offset, origin);
2228 + file->f_pos = od->realfile->f_pos;
2234 + offset += file->f_pos;
2244 + if (offset != file->f_pos) {
2245 + file->f_pos = offset;
2246 + if (od->is_cached)
2247 + ovl_seek_cursor(od, offset);
2252 + mutex_unlock(&file->f_dentry->d_inode->i_mutex);
2257 +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
2260 + struct ovl_dir_file *od = file->private_data;
2262 + /* May need to reopen directory if it got copied up */
2263 + if (!od->realfile) {
2264 + struct path upperpath;
2266 + ovl_path_upper(file->f_path.dentry, &upperpath);
2267 + od->realfile = ovl_path_open(&upperpath, O_RDONLY);
2268 + if (IS_ERR(od->realfile))
2269 + return PTR_ERR(od->realfile);
2272 + return vfs_fsync_range(od->realfile, start, end, datasync);
2275 +static int ovl_dir_release(struct inode *inode, struct file *file)
2277 + struct ovl_dir_file *od = file->private_data;
2279 + list_del(&od->cursor);
2280 + ovl_cache_free(&od->cache);
2282 + fput(od->realfile);
2288 +static int ovl_dir_open(struct inode *inode, struct file *file)
2290 + struct path realpath;
2291 + struct file *realfile;
2292 + struct ovl_dir_file *od;
2293 + enum ovl_path_type type;
2295 + od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
2299 + type = ovl_path_real(file->f_path.dentry, &realpath);
2300 + realfile = ovl_path_open(&realpath, file->f_flags);
2301 + if (IS_ERR(realfile)) {
2303 + return PTR_ERR(realfile);
2305 + INIT_LIST_HEAD(&od->cache);
2306 + INIT_LIST_HEAD(&od->cursor);
2307 + od->is_cached = false;
2308 + od->realfile = realfile;
2309 + od->is_real = (type != OVL_PATH_MERGE);
2310 + file->private_data = od;
2315 +const struct file_operations ovl_dir_operations = {
2316 + .read = generic_read_dir,
2317 + .open = ovl_dir_open,
2318 + .readdir = ovl_readdir,
2319 + .llseek = ovl_dir_llseek,
2320 + .fsync = ovl_dir_fsync,
2321 + .release = ovl_dir_release,
2324 +static int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
2327 + struct path lowerpath;
2328 + struct path upperpath;
2329 + struct ovl_cache_entry *p;
2330 + struct ovl_readdir_data rdd = { .list = list };
2332 + ovl_path_upper(dentry, &upperpath);
2333 + ovl_path_lower(dentry, &lowerpath);
2335 + err = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd);
2341 + list_for_each_entry(p, list, l_node) {
2342 + if (p->is_whiteout)
2345 + if (p->name[0] == '.') {
2348 + if (p->len == 2 && p->name[1] == '.')
2358 +static int ovl_remove_whiteouts(struct dentry *dir, struct list_head *list)
2360 + struct path upperpath;
2361 + struct dentry *upperdir;
2362 + struct ovl_cache_entry *p;
2363 + const struct cred *old_cred;
2364 + struct cred *override_cred;
2367 + ovl_path_upper(dir, &upperpath);
2368 + upperdir = upperpath.dentry;
2370 + override_cred = prepare_creds();
2371 + if (!override_cred)
2375 + * CAP_DAC_OVERRIDE for lookup and unlink
2376 + * CAP_SYS_ADMIN for setxattr of "trusted" namespace
2377 + * CAP_FOWNER for unlink in sticky directory
2379 + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
2380 + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
2381 + cap_raise(override_cred->cap_effective, CAP_FOWNER);
2382 + old_cred = override_creds(override_cred);
2384 + err = vfs_setxattr(upperdir, ovl_opaque_xattr, "y", 1, 0);
2386 + goto out_revert_creds;
2388 + mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
2389 + list_for_each_entry(p, list, l_node) {
2390 + struct dentry *dentry;
2393 + if (!p->is_whiteout)
2396 + dentry = lookup_one_len(p->name, upperdir, p->len);
2397 + if (IS_ERR(dentry)) {
2398 + printk(KERN_WARNING
2399 + "overlayfs: failed to lookup whiteout %.*s: %li\n",
2400 + p->len, p->name, PTR_ERR(dentry));
2403 + ret = vfs_unlink(upperdir->d_inode, dentry);
2406 + printk(KERN_WARNING
2407 + "overlayfs: failed to unlink whiteout %.*s: %i\n",
2408 + p->len, p->name, ret);
2410 + mutex_unlock(&upperdir->d_inode->i_mutex);
2413 + revert_creds(old_cred);
2414 + put_cred(override_cred);
2419 +int ovl_check_empty_and_clear(struct dentry *dentry, enum ovl_path_type type)
2424 + err = ovl_check_empty_dir(dentry, &list);
2425 + if (!err && type == OVL_PATH_MERGE)
2426 + err = ovl_remove_whiteouts(dentry, &list);
2428 + ovl_cache_free(&list);
2432 diff -Nur -x .git vfs-4cbe5a5/fs/overlayfs/super.c vfs-3d5a648/fs/overlayfs/super.c
2433 --- vfs-4cbe5a5/fs/overlayfs/super.c 1970-01-01 01:00:00.000000000 +0100
2434 +++ vfs-3d5a648/fs/overlayfs/super.c 2012-09-05 16:35:20.000000000 +0200
2438 + * Copyright (C) 2011 Novell Inc.
2440 + * This program is free software; you can redistribute it and/or modify it
2441 + * under the terms of the GNU General Public License version 2 as published by
2442 + * the Free Software Foundation.
2445 +#include <linux/fs.h>
2446 +#include <linux/namei.h>
2447 +#include <linux/xattr.h>
2448 +#include <linux/security.h>
2449 +#include <linux/mount.h>
2450 +#include <linux/slab.h>
2451 +#include <linux/parser.h>
2452 +#include <linux/module.h>
2453 +#include <linux/cred.h>
2454 +#include <linux/sched.h>
2455 +#include <linux/statfs.h>
2456 +#include <linux/seq_file.h>
2457 +#include "overlayfs.h"
2459 +MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
2460 +MODULE_DESCRIPTION("Overlay filesystem");
2461 +MODULE_LICENSE("GPL");
2463 +#define OVERLAYFS_SUPER_MAGIC 0x794c764f
2465 +struct ovl_config {
2470 +/* private information held for overlayfs's superblock */
2472 + struct vfsmount *upper_mnt;
2473 + struct vfsmount *lower_mnt;
2474 + long lower_namelen;
2475 + /* pathnames of lower and upper dirs, for show_options */
2476 + struct ovl_config config;
2479 +/* private information held for every overlayfs dentry */
2482 + * Keep "double reference" on upper dentries, so that
2483 + * d_delete() doesn't think it's OK to reset d_inode to NULL.
2485 + struct dentry *__upperdentry;
2486 + struct dentry *lowerdentry;
2492 + struct rcu_head rcu;
2496 +const char *ovl_whiteout_xattr = "trusted.overlay.whiteout";
2497 +const char *ovl_opaque_xattr = "trusted.overlay.opaque";
2500 +enum ovl_path_type ovl_path_type(struct dentry *dentry)
2502 + struct ovl_entry *oe = dentry->d_fsdata;
2504 + if (oe->__upperdentry) {
2505 + if (oe->lowerdentry && S_ISDIR(dentry->d_inode->i_mode))
2506 + return OVL_PATH_MERGE;
2508 + return OVL_PATH_UPPER;
2510 + return OVL_PATH_LOWER;
2514 +static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
2516 + struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
2517 + smp_read_barrier_depends();
2518 + return upperdentry;
2521 +void ovl_path_upper(struct dentry *dentry, struct path *path)
2523 + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
2524 + struct ovl_entry *oe = dentry->d_fsdata;
2526 + path->mnt = ofs->upper_mnt;
2527 + path->dentry = ovl_upperdentry_dereference(oe);
2530 +void ovl_path_lower(struct dentry *dentry, struct path *path)
2532 + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
2533 + struct ovl_entry *oe = dentry->d_fsdata;
2535 + path->mnt = ofs->lower_mnt;
2536 + path->dentry = oe->lowerdentry;
2539 +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
2542 + enum ovl_path_type type = ovl_path_type(dentry);
2544 + if (type == OVL_PATH_LOWER)
2545 + ovl_path_lower(dentry, path);
2547 + ovl_path_upper(dentry, path);
2552 +struct dentry *ovl_dentry_upper(struct dentry *dentry)
2554 + struct ovl_entry *oe = dentry->d_fsdata;
2556 + return ovl_upperdentry_dereference(oe);
2559 +struct dentry *ovl_dentry_lower(struct dentry *dentry)
2561 + struct ovl_entry *oe = dentry->d_fsdata;
2563 + return oe->lowerdentry;
2566 +struct dentry *ovl_dentry_real(struct dentry *dentry)
2568 + struct ovl_entry *oe = dentry->d_fsdata;
2569 + struct dentry *realdentry;
2571 + realdentry = ovl_upperdentry_dereference(oe);
2573 + realdentry = oe->lowerdentry;
2575 + return realdentry;
2578 +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
2580 + struct dentry *realdentry;
2582 + realdentry = ovl_upperdentry_dereference(oe);
2586 + realdentry = oe->lowerdentry;
2587 + *is_upper = false;
2589 + return realdentry;
2592 +bool ovl_dentry_is_opaque(struct dentry *dentry)
2594 + struct ovl_entry *oe = dentry->d_fsdata;
2595 + return oe->opaque;
2598 +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
2600 + struct ovl_entry *oe = dentry->d_fsdata;
2601 + oe->opaque = opaque;
2604 +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
2606 + struct ovl_entry *oe = dentry->d_fsdata;
2608 + WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
2609 + WARN_ON(oe->__upperdentry);
2610 + BUG_ON(!upperdentry->d_inode);
2612 + oe->__upperdentry = dget(upperdentry);
2615 +void ovl_dentry_version_inc(struct dentry *dentry)
2617 + struct ovl_entry *oe = dentry->d_fsdata;
2619 + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
2623 +u64 ovl_dentry_version_get(struct dentry *dentry)
2625 + struct ovl_entry *oe = dentry->d_fsdata;
2627 + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
2628 + return oe->version;
2631 +bool ovl_is_whiteout(struct dentry *dentry)
2638 + if (!dentry->d_inode)
2640 + if (!S_ISLNK(dentry->d_inode->i_mode))
2643 + res = vfs_getxattr(dentry, ovl_whiteout_xattr, &val, 1);
2644 + if (res == 1 && val == 'y')
2650 +static bool ovl_is_opaquedir(struct dentry *dentry)
2655 + if (!S_ISDIR(dentry->d_inode->i_mode))
2658 + res = vfs_getxattr(dentry, ovl_opaque_xattr, &val, 1);
2659 + if (res == 1 && val == 'y')
2665 +static void ovl_entry_free(struct rcu_head *head)
2667 + struct ovl_entry *oe = container_of(head, struct ovl_entry, rcu);
2671 +static void ovl_dentry_release(struct dentry *dentry)
2673 + struct ovl_entry *oe = dentry->d_fsdata;
2676 + dput(oe->__upperdentry);
2677 + dput(oe->__upperdentry);
2678 + dput(oe->lowerdentry);
2679 + call_rcu(&oe->rcu, ovl_entry_free);
2683 +const struct dentry_operations ovl_dentry_operations = {
2684 + .d_release = ovl_dentry_release,
2687 +static struct ovl_entry *ovl_alloc_entry(void)
2689 + return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
2692 +static inline struct dentry *ovl_lookup_real(struct dentry *dir,
2693 + struct qstr *name)
2695 + struct dentry *dentry;
2697 + mutex_lock(&dir->d_inode->i_mutex);
2698 + dentry = lookup_one_len(name->name, dir, name->len);
2699 + mutex_unlock(&dir->d_inode->i_mutex);
2701 + if (IS_ERR(dentry)) {
2702 + if (PTR_ERR(dentry) == -ENOENT)
2704 + } else if (!dentry->d_inode) {
2711 +static int ovl_do_lookup(struct dentry *dentry)
2713 + struct ovl_entry *oe;
2714 + struct dentry *upperdir;
2715 + struct dentry *lowerdir;
2716 + struct dentry *upperdentry = NULL;
2717 + struct dentry *lowerdentry = NULL;
2718 + struct inode *inode = NULL;
2722 + oe = ovl_alloc_entry();
2726 + upperdir = ovl_dentry_upper(dentry->d_parent);
2727 + lowerdir = ovl_dentry_lower(dentry->d_parent);
2730 + upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
2731 + err = PTR_ERR(upperdentry);
2732 + if (IS_ERR(upperdentry))
2735 + if (lowerdir && upperdentry &&
2736 + (S_ISLNK(upperdentry->d_inode->i_mode) ||
2737 + S_ISDIR(upperdentry->d_inode->i_mode))) {
2738 + const struct cred *old_cred;
2739 + struct cred *override_cred;
2742 + override_cred = prepare_creds();
2743 + if (!override_cred)
2744 + goto out_dput_upper;
2746 + /* CAP_SYS_ADMIN needed for getxattr */
2747 + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
2748 + old_cred = override_creds(override_cred);
2750 + if (ovl_is_opaquedir(upperdentry)) {
2751 + oe->opaque = true;
2752 + } else if (ovl_is_whiteout(upperdentry)) {
2753 + dput(upperdentry);
2754 + upperdentry = NULL;
2755 + oe->opaque = true;
2757 + revert_creds(old_cred);
2758 + put_cred(override_cred);
2761 + if (lowerdir && !oe->opaque) {
2762 + lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
2763 + err = PTR_ERR(lowerdentry);
2764 + if (IS_ERR(lowerdentry))
2765 + goto out_dput_upper;
2768 + if (lowerdentry && upperdentry &&
2769 + (!S_ISDIR(upperdentry->d_inode->i_mode) ||
2770 + !S_ISDIR(lowerdentry->d_inode->i_mode))) {
2771 + dput(lowerdentry);
2772 + lowerdentry = NULL;
2773 + oe->opaque = true;
2776 + if (lowerdentry || upperdentry) {
2777 + struct dentry *realdentry;
2779 + realdentry = upperdentry ? upperdentry : lowerdentry;
2781 + inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
2785 + ovl_copyattr(realdentry->d_inode, inode);
2789 + oe->__upperdentry = dget(upperdentry);
2792 + oe->lowerdentry = lowerdentry;
2794 + dentry->d_fsdata = oe;
2795 + dentry->d_op = &ovl_dentry_operations;
2796 + d_add(dentry, inode);
2801 + dput(lowerdentry);
2803 + dput(upperdentry);
2810 +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
2811 + unsigned int flags)
2813 + int err = ovl_do_lookup(dentry);
2816 + return ERR_PTR(err);
2821 +struct file *ovl_path_open(struct path *path, int flags)
2824 + return dentry_open(path, flags, current_cred());
2827 +static void ovl_put_super(struct super_block *sb)
2829 + struct ovl_fs *ufs = sb->s_fs_info;
2831 + if (!(sb->s_flags & MS_RDONLY))
2832 + mnt_drop_write(ufs->upper_mnt);
2834 + mntput(ufs->upper_mnt);
2835 + mntput(ufs->lower_mnt);
2837 + kfree(ufs->config.lowerdir);
2838 + kfree(ufs->config.upperdir);
2842 +static int ovl_remount_fs(struct super_block *sb, int *flagsp, char *data)
2844 + int flags = *flagsp;
2845 + struct ovl_fs *ufs = sb->s_fs_info;
2847 + /* When remounting rw or ro, we need to adjust the write access to the
2850 + if (((flags ^ sb->s_flags) & MS_RDONLY) == 0)
2851 + /* No change to readonly status */
2854 + if (flags & MS_RDONLY) {
2855 + mnt_drop_write(ufs->upper_mnt);
2858 + return mnt_want_write(ufs->upper_mnt);
2863 + * @sb: The overlayfs super block
2864 + * @buf: The struct kstatfs to fill in with stats
2866 + * Get the filesystem statistics. As writes always target the upper layer
2867 + * filesystem pass the statfs to the same filesystem.
2869 +static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
2871 + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
2872 + struct dentry *root_dentry = dentry->d_sb->s_root;
2876 + ovl_path_upper(root_dentry, &path);
2878 + err = vfs_statfs(&path, buf);
2880 + buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen);
2881 + buf->f_type = OVERLAYFS_SUPER_MAGIC;
2888 + * ovl_show_options
2890 + * Prints the mount options for a given superblock.
2891 + * Returns zero; does not fail.
2893 +static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
2895 + struct super_block *sb = dentry->d_sb;
2896 + struct ovl_fs *ufs = sb->s_fs_info;
2898 + seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
2899 + seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
2903 +static const struct super_operations ovl_super_operations = {
2904 + .put_super = ovl_put_super,
2905 + .remount_fs = ovl_remount_fs,
2906 + .statfs = ovl_statfs,
2907 + .show_options = ovl_show_options,
2916 +static const match_table_t ovl_tokens = {
2917 + {Opt_lowerdir, "lowerdir=%s"},
2918 + {Opt_upperdir, "upperdir=%s"},
2922 +static int ovl_parse_opt(char *opt, struct ovl_config *config)
2926 + config->upperdir = NULL;
2927 + config->lowerdir = NULL;
2929 + while ((p = strsep(&opt, ",")) != NULL) {
2931 + substring_t args[MAX_OPT_ARGS];
2936 + token = match_token(p, ovl_tokens, args);
2938 + case Opt_upperdir:
2939 + kfree(config->upperdir);
2940 + config->upperdir = match_strdup(&args[0]);
2941 + if (!config->upperdir)
2945 + case Opt_lowerdir:
2946 + kfree(config->lowerdir);
2947 + config->lowerdir = match_strdup(&args[0]);
2948 + if (!config->lowerdir)
2959 +static int ovl_fill_super(struct super_block *sb, void *data, int silent)
2961 + struct path lowerpath;
2962 + struct path upperpath;
2963 + struct inode *root_inode;
2964 + struct dentry *root_dentry;
2965 + struct ovl_entry *oe;
2966 + struct ovl_fs *ufs;
2967 + struct kstatfs statfs;
2971 + ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL);
2975 + err = ovl_parse_opt((char *) data, &ufs->config);
2977 + goto out_free_ufs;
2980 + if (!ufs->config.upperdir || !ufs->config.lowerdir) {
2981 + printk(KERN_ERR "overlayfs: missing upperdir or lowerdir\n");
2982 + goto out_free_config;
2985 + oe = ovl_alloc_entry();
2987 + goto out_free_config;
2989 + err = kern_path(ufs->config.upperdir, LOOKUP_FOLLOW, &upperpath);
2993 + err = kern_path(ufs->config.lowerdir, LOOKUP_FOLLOW, &lowerpath);
2995 + goto out_put_upperpath;
2998 + if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
2999 + !S_ISDIR(lowerpath.dentry->d_inode->i_mode))
3000 + goto out_put_lowerpath;
3002 + err = vfs_statfs(&lowerpath, &statfs);
3004 + printk(KERN_ERR "overlayfs: statfs failed on lowerpath\n");
3005 + goto out_put_lowerpath;
3007 + ufs->lower_namelen = statfs.f_namelen;
3009 + sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
3010 + lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
3013 + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
3014 + printk(KERN_ERR "overlayfs: maximum fs stacking depth exceeded\n");
3015 + goto out_put_lowerpath;
3019 + ufs->upper_mnt = clone_private_mount(&upperpath);
3020 + err = PTR_ERR(ufs->upper_mnt);
3021 + if (IS_ERR(ufs->upper_mnt)) {
3022 + printk(KERN_ERR "overlayfs: failed to clone upperpath\n");
3023 + goto out_put_lowerpath;
3026 + ufs->lower_mnt = clone_private_mount(&lowerpath);
3027 + err = PTR_ERR(ufs->lower_mnt);
3028 + if (IS_ERR(ufs->lower_mnt)) {
3029 + printk(KERN_ERR "overlayfs: failed to clone lowerpath\n");
3030 + goto out_put_upper_mnt;
3034 + * Make lower_mnt R/O. That way fchmod/fchown on lower file
3035 + * will fail instead of modifying lower fs.
3037 + ufs->lower_mnt->mnt_flags |= MNT_READONLY;
3039 + /* If the upper fs is r/o, we mark overlayfs r/o too */
3040 + if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)
3041 + sb->s_flags |= MS_RDONLY;
3043 + if (!(sb->s_flags & MS_RDONLY)) {
3044 + err = mnt_want_write(ufs->upper_mnt);
3046 + goto out_put_lower_mnt;
3050 + root_inode = ovl_new_inode(sb, S_IFDIR, oe);
3052 + goto out_drop_write;
3054 + root_dentry = d_make_root(root_inode);
3056 + goto out_drop_write;
3058 + mntput(upperpath.mnt);
3059 + mntput(lowerpath.mnt);
3061 + oe->__upperdentry = dget(upperpath.dentry);
3062 + oe->lowerdentry = lowerpath.dentry;
3064 + root_dentry->d_fsdata = oe;
3065 + root_dentry->d_op = &ovl_dentry_operations;
3067 + sb->s_magic = OVERLAYFS_SUPER_MAGIC;
3068 + sb->s_op = &ovl_super_operations;
3069 + sb->s_root = root_dentry;
3070 + sb->s_fs_info = ufs;
3075 + if (!(sb->s_flags & MS_RDONLY))
3076 + mnt_drop_write(ufs->upper_mnt);
3078 + mntput(ufs->lower_mnt);
3080 + mntput(ufs->upper_mnt);
3082 + path_put(&lowerpath);
3084 + path_put(&upperpath);
3088 + kfree(ufs->config.lowerdir);
3089 + kfree(ufs->config.upperdir);
3096 +static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
3097 + const char *dev_name, void *raw_data)
3099 + return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
3102 +static struct file_system_type ovl_fs_type = {
3103 + .owner = THIS_MODULE,
3104 + .name = "overlayfs",
3105 + .mount = ovl_mount,
3106 + .kill_sb = kill_anon_super,
3109 +static int __init ovl_init(void)
3111 + return register_filesystem(&ovl_fs_type);
3114 +static void __exit ovl_exit(void)
3116 + unregister_filesystem(&ovl_fs_type);
3119 +module_init(ovl_init);
3120 +module_exit(ovl_exit);
3121 diff -Nur -x .git vfs-4cbe5a5/fs/splice.c vfs-3d5a648/fs/splice.c
3122 --- vfs-4cbe5a5/fs/splice.c 2012-09-01 19:39:58.000000000 +0200
3123 +++ vfs-3d5a648/fs/splice.c 2012-09-05 16:35:20.000000000 +0200
3124 @@ -1308,6 +1308,7 @@
3128 +EXPORT_SYMBOL(do_splice_direct);
3130 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
3131 struct pipe_inode_info *opipe,
3132 diff -Nur -x .git vfs-4cbe5a5/include/linux/fs.h vfs-3d5a648/include/linux/fs.h
3133 --- vfs-4cbe5a5/include/linux/fs.h 2012-09-01 19:39:58.000000000 +0200
3134 +++ vfs-3d5a648/include/linux/fs.h 2012-09-05 16:35:20.000000000 +0200
3135 @@ -505,6 +505,12 @@
3137 #include <linux/quota.h>
3140 + * Maximum number of layers of fs stack. Needs to be limited to
3141 + * prevent kernel stack overflow
3143 +#define FILESYSTEM_MAX_STACK_DEPTH 2
3146 * enum positive_aop_returns - aop return codes with specific semantics
3148 @@ -1578,6 +1584,11 @@
3150 /* Being remounted read-only */
3151 int s_readonly_remount;
3154 + * Indicates how deep in a filesystem stack this SB is
3156 + int s_stack_depth;
3159 /* superblock cache pruning functions */
3160 @@ -1835,6 +1846,7 @@
3161 int (*atomic_open)(struct inode *, struct dentry *,
3162 struct file *, unsigned open_flag,
3163 umode_t create_mode, int *opened);
3164 + int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
3165 } ____cacheline_aligned;
3168 @@ -2199,6 +2211,7 @@
3169 extern struct file *filp_open(const char *, int, umode_t);
3170 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
3172 +extern int vfs_open(const struct path *, struct file *, const struct cred *);
3173 extern struct file * dentry_open(const struct path *, int, const struct cred *);
3174 extern int filp_close(struct file *, fl_owner_t id);
3175 extern char * getname(const char __user *);
3176 @@ -2402,6 +2415,7 @@
3178 extern int notify_change(struct dentry *, struct iattr *);
3179 extern int inode_permission(struct inode *, int);
3180 +extern int __inode_permission(struct inode *, int);
3181 extern int generic_permission(struct inode *, int);
3183 static inline bool execute_ok(struct inode *inode)
3184 diff -Nur -x .git vfs-4cbe5a5/include/linux/mount.h vfs-3d5a648/include/linux/mount.h
3185 --- vfs-4cbe5a5/include/linux/mount.h 2012-09-01 19:39:58.000000000 +0200
3186 +++ vfs-3d5a648/include/linux/mount.h 2012-09-05 16:35:20.000000000 +0200
3188 extern void mnt_unpin(struct vfsmount *mnt);
3189 extern int __mnt_is_readonly(struct vfsmount *mnt);
3192 +extern struct vfsmount *clone_private_mount(struct path *path);
3194 struct file_system_type;
3195 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
3196 int flags, const char *name,
3197 diff -Nur -x .git vfs-4cbe5a5/MAINTAINERS vfs-3d5a648/MAINTAINERS
3198 --- vfs-4cbe5a5/MAINTAINERS 2012-09-01 19:39:58.000000000 +0200
3199 +++ vfs-3d5a648/MAINTAINERS 2012-09-05 16:35:20.000000000 +0200
3200 @@ -5103,6 +5103,13 @@
3201 F: include/scsi/osd_*
3204 +OVERLAYFS FILESYSTEM
3205 +M: Miklos Szeredi <miklos@szeredi.hu>
3206 +L: linux-fsdevel@vger.kernel.org
3209 +F: Documentation/filesystems/overlayfs.txt
3212 M: Christian Lamparter <chunkeey@googlemail.com>
3213 L: linux-wireless@vger.kernel.org