4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_sb->mnt_cifs_flags &
144 CIFS_MOUNT_MAP_SPECIAL_CHR);
145 cifs_put_tlink(tlink);
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
167 cifs_fattr_to_inode(*pinode, &fattr);
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
183 int create_options = CREATE_NOT_DIR;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
229 oparms.cifs_sb = cifs_sb;
230 oparms.desired_access = desired_access;
231 oparms.create_options = create_options;
232 oparms.disposition = disposition;
233 oparms.path = full_path;
235 oparms.reconnect = false;
237 rc = server->ops->open(xid, &oparms, oplock, buf);
243 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
246 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 struct cifs_fid_locks *cur;
258 bool has_locks = false;
260 down_read(&cinode->lock_sem);
261 list_for_each_entry(cur, &cinode->llist, llist) {
262 if (!list_empty(&cur->locks)) {
267 up_read(&cinode->lock_sem);
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273 struct tcon_link *tlink, __u32 oplock)
275 struct dentry *dentry = file->f_path.dentry;
276 struct inode *inode = dentry->d_inode;
277 struct cifsInodeInfo *cinode = CIFS_I(inode);
278 struct cifsFileInfo *cfile;
279 struct cifs_fid_locks *fdlocks;
280 struct cifs_tcon *tcon = tlink_tcon(tlink);
281 struct TCP_Server_Info *server = tcon->ses->server;
283 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
287 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
293 INIT_LIST_HEAD(&fdlocks->locks);
294 fdlocks->cfile = cfile;
295 cfile->llist = fdlocks;
296 down_write(&cinode->lock_sem);
297 list_add(&fdlocks->llist, &cinode->llist);
298 up_write(&cinode->lock_sem);
301 cfile->pid = current->tgid;
302 cfile->uid = current_fsuid();
303 cfile->dentry = dget(dentry);
304 cfile->f_flags = file->f_flags;
305 cfile->invalidHandle = false;
306 cfile->tlink = cifs_get_tlink(tlink);
307 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308 mutex_init(&cfile->fh_mutex);
310 cifs_sb_active(inode->i_sb);
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
321 spin_lock(&cifs_file_list_lock);
322 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323 oplock = fid->pending_open->oplock;
324 list_del(&fid->pending_open->olist);
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock);
329 list_add(&cfile->tlist, &tcon->openFileList);
330 /* if readable file instance put first in list*/
331 if (file->f_mode & FMODE_READ)
332 list_add(&cfile->flist, &cinode->openFileList);
334 list_add_tail(&cfile->flist, &cinode->openFileList);
335 spin_unlock(&cifs_file_list_lock);
337 if (fid->purge_cache)
338 cifs_zap_mapping(inode);
340 file->private_data = cfile;
344 struct cifsFileInfo *
345 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 spin_lock(&cifs_file_list_lock);
348 cifsFileInfo_get_locked(cifs_file);
349 spin_unlock(&cifs_file_list_lock);
354 * Release a reference on the file private data. This may involve closing
355 * the filehandle out on the server. Must be called without holding
356 * cifs_file_list_lock.
358 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 struct inode *inode = cifs_file->dentry->d_inode;
361 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
362 struct TCP_Server_Info *server = tcon->ses->server;
363 struct cifsInodeInfo *cifsi = CIFS_I(inode);
364 struct super_block *sb = inode->i_sb;
365 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
366 struct cifsLockInfo *li, *tmp;
368 struct cifs_pending_open open;
369 bool oplock_break_cancelled;
371 spin_lock(&cifs_file_list_lock);
372 if (--cifs_file->count > 0) {
373 spin_unlock(&cifs_file_list_lock);
377 if (server->ops->get_lease_key)
378 server->ops->get_lease_key(inode, &fid);
380 /* store open in pending opens to make sure we don't miss lease break */
381 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
383 /* remove it from the lists */
384 list_del(&cifs_file->flist);
385 list_del(&cifs_file->tlist);
387 if (list_empty(&cifsi->openFileList)) {
388 cifs_dbg(FYI, "closing last open instance for inode %p\n",
389 cifs_file->dentry->d_inode);
391 * In strict cache mode we need invalidate mapping on the last
392 * close because it may cause a error when we open this file
393 * again and get at least level II oplock.
395 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
396 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
397 cifs_set_oplock_level(cifsi, 0);
399 spin_unlock(&cifs_file_list_lock);
401 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
403 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
404 struct TCP_Server_Info *server = tcon->ses->server;
408 if (server->ops->close)
409 server->ops->close(xid, tcon, &cifs_file->fid);
413 if (oplock_break_cancelled)
414 cifs_done_oplock_break(cifsi);
416 cifs_del_pending_open(&open);
419 * Delete any outstanding lock records. We'll lose them when the file
422 down_write(&cifsi->lock_sem);
423 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
424 list_del(&li->llist);
425 cifs_del_lock_waiters(li);
428 list_del(&cifs_file->llist->llist);
429 kfree(cifs_file->llist);
430 up_write(&cifsi->lock_sem);
432 cifs_put_tlink(cifs_file->tlink);
433 dput(cifs_file->dentry);
434 cifs_sb_deactive(sb);
438 int cifs_open(struct inode *inode, struct file *file)
444 struct cifs_sb_info *cifs_sb;
445 struct TCP_Server_Info *server;
446 struct cifs_tcon *tcon;
447 struct tcon_link *tlink;
448 struct cifsFileInfo *cfile = NULL;
449 char *full_path = NULL;
450 bool posix_open_ok = false;
452 struct cifs_pending_open open;
456 cifs_sb = CIFS_SB(inode->i_sb);
457 tlink = cifs_sb_tlink(cifs_sb);
460 return PTR_ERR(tlink);
462 tcon = tlink_tcon(tlink);
463 server = tcon->ses->server;
465 full_path = build_path_from_dentry(file->f_path.dentry);
466 if (full_path == NULL) {
471 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
472 inode, file->f_flags, full_path);
474 if (file->f_flags & O_DIRECT &&
475 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
476 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
477 file->f_op = &cifs_file_direct_nobrl_ops;
479 file->f_op = &cifs_file_direct_ops;
487 if (!tcon->broken_posix_open && tcon->unix_ext &&
488 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
489 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
490 /* can not refresh inode info since size could be stale */
491 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
492 cifs_sb->mnt_file_mode /* ignored */,
493 file->f_flags, &oplock, &fid.netfid, xid);
495 cifs_dbg(FYI, "posix open succeeded\n");
496 posix_open_ok = true;
497 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
498 if (tcon->ses->serverNOS)
499 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
500 tcon->ses->serverName,
501 tcon->ses->serverNOS);
502 tcon->broken_posix_open = true;
503 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
504 (rc != -EOPNOTSUPP)) /* path not found or net err */
507 * Else fallthrough to retry open the old way on network i/o
512 if (server->ops->get_lease_key)
513 server->ops->get_lease_key(inode, &fid);
515 cifs_add_pending_open(&fid, tlink, &open);
517 if (!posix_open_ok) {
518 if (server->ops->get_lease_key)
519 server->ops->get_lease_key(inode, &fid);
521 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
522 file->f_flags, &oplock, &fid, xid);
524 cifs_del_pending_open(&open);
529 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
531 if (server->ops->close)
532 server->ops->close(xid, tcon, &fid);
533 cifs_del_pending_open(&open);
538 cifs_fscache_set_inode_cookie(inode, file);
540 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
542 * Time to set mode which we can not set earlier due to
543 * problems creating new read-only files.
545 struct cifs_unix_set_info_args args = {
546 .mode = inode->i_mode,
547 .uid = INVALID_UID, /* no change */
548 .gid = INVALID_GID, /* no change */
549 .ctime = NO_CHANGE_64,
550 .atime = NO_CHANGE_64,
551 .mtime = NO_CHANGE_64,
554 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
561 cifs_put_tlink(tlink);
565 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
568 * Try to reacquire byte range locks that were released when session
569 * to server was lost.
572 cifs_relock_file(struct cifsFileInfo *cfile)
574 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
575 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
576 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
579 down_read(&cinode->lock_sem);
580 if (cinode->can_cache_brlcks) {
581 /* can cache locks - no need to relock */
582 up_read(&cinode->lock_sem);
586 if (cap_unix(tcon->ses) &&
587 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
588 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
589 rc = cifs_push_posix_locks(cfile);
591 rc = tcon->ses->server->ops->push_mand_locks(cfile);
593 up_read(&cinode->lock_sem);
598 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
603 struct cifs_sb_info *cifs_sb;
604 struct cifs_tcon *tcon;
605 struct TCP_Server_Info *server;
606 struct cifsInodeInfo *cinode;
608 char *full_path = NULL;
610 int disposition = FILE_OPEN;
611 int create_options = CREATE_NOT_DIR;
612 struct cifs_open_parms oparms;
615 mutex_lock(&cfile->fh_mutex);
616 if (!cfile->invalidHandle) {
617 mutex_unlock(&cfile->fh_mutex);
623 inode = cfile->dentry->d_inode;
624 cifs_sb = CIFS_SB(inode->i_sb);
625 tcon = tlink_tcon(cfile->tlink);
626 server = tcon->ses->server;
629 * Can not grab rename sem here because various ops, including those
630 * that already have the rename sem can end up causing writepage to get
631 * called and if the server was down that means we end up here, and we
632 * can never tell if the caller already has the rename_sem.
634 full_path = build_path_from_dentry(cfile->dentry);
635 if (full_path == NULL) {
637 mutex_unlock(&cfile->fh_mutex);
642 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
643 inode, cfile->f_flags, full_path);
645 if (tcon->ses->server->oplocks)
650 if (tcon->unix_ext && cap_unix(tcon->ses) &&
651 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
652 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
654 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
655 * original open. Must mask them off for a reopen.
657 unsigned int oflags = cfile->f_flags &
658 ~(O_CREAT | O_EXCL | O_TRUNC);
660 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
661 cifs_sb->mnt_file_mode /* ignored */,
662 oflags, &oplock, &cfile->fid.netfid, xid);
664 cifs_dbg(FYI, "posix reopen succeeded\n");
665 oparms.reconnect = true;
669 * fallthrough to retry open the old way on errors, especially
670 * in the reconnect path it is important to retry hard
674 desired_access = cifs_convert_flags(cfile->f_flags);
676 if (backup_cred(cifs_sb))
677 create_options |= CREATE_OPEN_BACKUP_INTENT;
679 if (server->ops->get_lease_key)
680 server->ops->get_lease_key(inode, &cfile->fid);
683 oparms.cifs_sb = cifs_sb;
684 oparms.desired_access = desired_access;
685 oparms.create_options = create_options;
686 oparms.disposition = disposition;
687 oparms.path = full_path;
688 oparms.fid = &cfile->fid;
689 oparms.reconnect = true;
692 * Can not refresh inode by passing in file_info buf to be returned by
693 * ops->open and then calling get_inode_info with returned buf since
694 * file might have write behind data that needs to be flushed and server
695 * version of file size can be stale. If we knew for sure that inode was
696 * not dirty locally we could do this.
698 rc = server->ops->open(xid, &oparms, &oplock, NULL);
699 if (rc == -ENOENT && oparms.reconnect == false) {
700 /* durable handle timeout is expired - open the file again */
701 rc = server->ops->open(xid, &oparms, &oplock, NULL);
702 /* indicate that we need to relock the file */
703 oparms.reconnect = true;
707 mutex_unlock(&cfile->fh_mutex);
708 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
709 cifs_dbg(FYI, "oplock: %d\n", oplock);
710 goto reopen_error_exit;
714 cfile->invalidHandle = false;
715 mutex_unlock(&cfile->fh_mutex);
716 cinode = CIFS_I(inode);
719 rc = filemap_write_and_wait(inode->i_mapping);
720 mapping_set_error(inode->i_mapping, rc);
723 rc = cifs_get_inode_info_unix(&inode, full_path,
726 rc = cifs_get_inode_info(&inode, full_path, NULL,
727 inode->i_sb, xid, NULL);
730 * Else we are writing out data to server already and could deadlock if
731 * we tried to flush data, and since we do not know if we have data that
732 * would invalidate the current end of file on the server we can not go
733 * to the server to get the new inode info.
736 server->ops->set_fid(cfile, &cfile->fid, oplock);
737 if (oparms.reconnect)
738 cifs_relock_file(cfile);
746 int cifs_close(struct inode *inode, struct file *file)
748 if (file->private_data != NULL) {
749 cifsFileInfo_put(file->private_data);
750 file->private_data = NULL;
753 /* return code from the ->release op is always ignored */
757 int cifs_closedir(struct inode *inode, struct file *file)
761 struct cifsFileInfo *cfile = file->private_data;
762 struct cifs_tcon *tcon;
763 struct TCP_Server_Info *server;
766 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
772 tcon = tlink_tcon(cfile->tlink);
773 server = tcon->ses->server;
775 cifs_dbg(FYI, "Freeing private data in close dir\n");
776 spin_lock(&cifs_file_list_lock);
777 if (server->ops->dir_needs_close(cfile)) {
778 cfile->invalidHandle = true;
779 spin_unlock(&cifs_file_list_lock);
780 if (server->ops->close_dir)
781 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
784 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
785 /* not much we can do if it fails anyway, ignore rc */
788 spin_unlock(&cifs_file_list_lock);
790 buf = cfile->srch_inf.ntwrk_buf_start;
792 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
793 cfile->srch_inf.ntwrk_buf_start = NULL;
794 if (cfile->srch_inf.smallBuf)
795 cifs_small_buf_release(buf);
797 cifs_buf_release(buf);
800 cifs_put_tlink(cfile->tlink);
801 kfree(file->private_data);
802 file->private_data = NULL;
803 /* BB can we lock the filestruct while this is going on? */
808 static struct cifsLockInfo *
809 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
811 struct cifsLockInfo *lock =
812 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
815 lock->offset = offset;
816 lock->length = length;
818 lock->pid = current->tgid;
819 INIT_LIST_HEAD(&lock->blist);
820 init_waitqueue_head(&lock->block_q);
825 cifs_del_lock_waiters(struct cifsLockInfo *lock)
827 struct cifsLockInfo *li, *tmp;
828 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
829 list_del_init(&li->blist);
830 wake_up(&li->block_q);
834 #define CIFS_LOCK_OP 0
835 #define CIFS_READ_OP 1
836 #define CIFS_WRITE_OP 2
838 /* @rw_check : 0 - no op, 1 - read, 2 - write */
840 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
841 __u64 length, __u8 type, struct cifsFileInfo *cfile,
842 struct cifsLockInfo **conf_lock, int rw_check)
844 struct cifsLockInfo *li;
845 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
846 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
848 list_for_each_entry(li, &fdlocks->locks, llist) {
849 if (offset + length <= li->offset ||
850 offset >= li->offset + li->length)
852 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
853 server->ops->compare_fids(cfile, cur_cfile)) {
854 /* shared lock prevents write op through the same fid */
855 if (!(li->type & server->vals->shared_lock_type) ||
856 rw_check != CIFS_WRITE_OP)
859 if ((type & server->vals->shared_lock_type) &&
860 ((server->ops->compare_fids(cfile, cur_cfile) &&
861 current->tgid == li->pid) || type == li->type))
871 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
872 __u8 type, struct cifsLockInfo **conf_lock,
876 struct cifs_fid_locks *cur;
877 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 list_for_each_entry(cur, &cinode->llist, llist) {
880 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
881 cfile, conf_lock, rw_check);
890 * Check if there is another lock that prevents us to set the lock (mandatory
891 * style). If such a lock exists, update the flock structure with its
892 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
893 * or leave it the same if we can't. Returns 0 if we don't need to request to
894 * the server or 1 otherwise.
897 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
898 __u8 type, struct file_lock *flock)
901 struct cifsLockInfo *conf_lock;
902 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
903 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
906 down_read(&cinode->lock_sem);
908 exist = cifs_find_lock_conflict(cfile, offset, length, type,
909 &conf_lock, CIFS_LOCK_OP);
911 flock->fl_start = conf_lock->offset;
912 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
913 flock->fl_pid = conf_lock->pid;
914 if (conf_lock->type & server->vals->shared_lock_type)
915 flock->fl_type = F_RDLCK;
917 flock->fl_type = F_WRLCK;
918 } else if (!cinode->can_cache_brlcks)
921 flock->fl_type = F_UNLCK;
923 up_read(&cinode->lock_sem);
928 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
930 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
931 down_write(&cinode->lock_sem);
932 list_add_tail(&lock->llist, &cfile->llist->locks);
933 up_write(&cinode->lock_sem);
937 * Set the byte-range lock (mandatory style). Returns:
938 * 1) 0, if we set the lock and don't need to request to the server;
939 * 2) 1, if no locks prevent us but we need to request to the server;
940 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
943 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
946 struct cifsLockInfo *conf_lock;
947 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
953 down_write(&cinode->lock_sem);
955 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
956 lock->type, &conf_lock, CIFS_LOCK_OP);
957 if (!exist && cinode->can_cache_brlcks) {
958 list_add_tail(&lock->llist, &cfile->llist->locks);
959 up_write(&cinode->lock_sem);
968 list_add_tail(&lock->blist, &conf_lock->blist);
969 up_write(&cinode->lock_sem);
970 rc = wait_event_interruptible(lock->block_q,
971 (lock->blist.prev == &lock->blist) &&
972 (lock->blist.next == &lock->blist));
975 down_write(&cinode->lock_sem);
976 list_del_init(&lock->blist);
979 up_write(&cinode->lock_sem);
984 * Check if there is another lock that prevents us to set the lock (posix
985 * style). If such a lock exists, update the flock structure with its
986 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
987 * or leave it the same if we can't. Returns 0 if we don't need to request to
988 * the server or 1 otherwise.
991 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
994 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
995 unsigned char saved_type = flock->fl_type;
997 if ((flock->fl_flags & FL_POSIX) == 0)
1000 down_read(&cinode->lock_sem);
1001 posix_test_lock(file, flock);
1003 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1004 flock->fl_type = saved_type;
1008 up_read(&cinode->lock_sem);
1013 * Set the byte-range lock (posix style). Returns:
1014 * 1) 0, if we set the lock and don't need to request to the server;
1015 * 2) 1, if we need to request to the server;
1016 * 3) <0, if the error occurs while setting the lock.
1019 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1021 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1024 if ((flock->fl_flags & FL_POSIX) == 0)
1028 down_write(&cinode->lock_sem);
1029 if (!cinode->can_cache_brlcks) {
1030 up_write(&cinode->lock_sem);
1034 rc = posix_lock_file(file, flock, NULL);
1035 up_write(&cinode->lock_sem);
1036 if (rc == FILE_LOCK_DEFERRED) {
1037 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1040 posix_unblock_lock(flock);
1046 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1049 int rc = 0, stored_rc;
1050 struct cifsLockInfo *li, *tmp;
1051 struct cifs_tcon *tcon;
1052 unsigned int num, max_num, max_buf;
1053 LOCKING_ANDX_RANGE *buf, *cur;
1054 int types[] = {LOCKING_ANDX_LARGE_FILES,
1055 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1059 tcon = tlink_tcon(cfile->tlink);
1062 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1063 * and check it for zero before using.
1065 max_buf = tcon->ses->server->maxBuf;
1071 max_num = (max_buf - sizeof(struct smb_hdr)) /
1072 sizeof(LOCKING_ANDX_RANGE);
1073 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1079 for (i = 0; i < 2; i++) {
1082 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1083 if (li->type != types[i])
1085 cur->Pid = cpu_to_le16(li->pid);
1086 cur->LengthLow = cpu_to_le32((u32)li->length);
1087 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1088 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1089 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1090 if (++num == max_num) {
1091 stored_rc = cifs_lockv(xid, tcon,
1093 (__u8)li->type, 0, num,
1104 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1105 (__u8)types[i], 0, num, buf);
1116 /* copied from fs/locks.c with a name change */
1117 #define cifs_for_each_lock(inode, lockp) \
1118 for (lockp = &inode->i_flock; *lockp != NULL; \
1119 lockp = &(*lockp)->fl_next)
1121 struct lock_to_push {
1122 struct list_head llist;
1131 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1133 struct inode *inode = cfile->dentry->d_inode;
1134 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1135 struct file_lock *flock, **before;
1136 unsigned int count = 0, i = 0;
1137 int rc = 0, xid, type;
1138 struct list_head locks_to_send, *el;
1139 struct lock_to_push *lck, *tmp;
1144 spin_lock(&inode->i_lock);
1145 cifs_for_each_lock(inode, before) {
1146 if ((*before)->fl_flags & FL_POSIX)
1149 spin_unlock(&inode->i_lock);
1151 INIT_LIST_HEAD(&locks_to_send);
1154 * Allocating count locks is enough because no FL_POSIX locks can be
1155 * added to the list while we are holding cinode->lock_sem that
1156 * protects locking operations of this inode.
1158 for (; i < count; i++) {
1159 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1164 list_add_tail(&lck->llist, &locks_to_send);
1167 el = locks_to_send.next;
1168 spin_lock(&inode->i_lock);
1169 cifs_for_each_lock(inode, before) {
1171 if ((flock->fl_flags & FL_POSIX) == 0)
1173 if (el == &locks_to_send) {
1175 * The list ended. We don't have enough allocated
1176 * structures - something is really wrong.
1178 cifs_dbg(VFS, "Can't push all brlocks!\n");
1181 length = 1 + flock->fl_end - flock->fl_start;
1182 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1186 lck = list_entry(el, struct lock_to_push, llist);
1187 lck->pid = flock->fl_pid;
1188 lck->netfid = cfile->fid.netfid;
1189 lck->length = length;
1191 lck->offset = flock->fl_start;
1194 spin_unlock(&inode->i_lock);
1196 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1199 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1200 lck->offset, lck->length, NULL,
1204 list_del(&lck->llist);
1212 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1213 list_del(&lck->llist);
1220 cifs_push_locks(struct cifsFileInfo *cfile)
1222 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1223 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1224 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1227 /* we are going to update can_cache_brlcks here - need a write access */
1228 down_write(&cinode->lock_sem);
1229 if (!cinode->can_cache_brlcks) {
1230 up_write(&cinode->lock_sem);
1234 if (cap_unix(tcon->ses) &&
1235 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1236 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1237 rc = cifs_push_posix_locks(cfile);
1239 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1241 cinode->can_cache_brlcks = false;
1242 up_write(&cinode->lock_sem);
1247 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1248 bool *wait_flag, struct TCP_Server_Info *server)
1250 if (flock->fl_flags & FL_POSIX)
1251 cifs_dbg(FYI, "Posix\n");
1252 if (flock->fl_flags & FL_FLOCK)
1253 cifs_dbg(FYI, "Flock\n");
1254 if (flock->fl_flags & FL_SLEEP) {
1255 cifs_dbg(FYI, "Blocking lock\n");
1258 if (flock->fl_flags & FL_ACCESS)
1259 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1260 if (flock->fl_flags & FL_LEASE)
1261 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1262 if (flock->fl_flags &
1263 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1264 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1265 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1267 *type = server->vals->large_lock_type;
1268 if (flock->fl_type == F_WRLCK) {
1269 cifs_dbg(FYI, "F_WRLCK\n");
1270 *type |= server->vals->exclusive_lock_type;
1272 } else if (flock->fl_type == F_UNLCK) {
1273 cifs_dbg(FYI, "F_UNLCK\n");
1274 *type |= server->vals->unlock_lock_type;
1276 /* Check if unlock includes more than one lock range */
1277 } else if (flock->fl_type == F_RDLCK) {
1278 cifs_dbg(FYI, "F_RDLCK\n");
1279 *type |= server->vals->shared_lock_type;
1281 } else if (flock->fl_type == F_EXLCK) {
1282 cifs_dbg(FYI, "F_EXLCK\n");
1283 *type |= server->vals->exclusive_lock_type;
1285 } else if (flock->fl_type == F_SHLCK) {
1286 cifs_dbg(FYI, "F_SHLCK\n");
1287 *type |= server->vals->shared_lock_type;
1290 cifs_dbg(FYI, "Unknown type of lock\n");
1294 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1295 bool wait_flag, bool posix_lck, unsigned int xid)
1298 __u64 length = 1 + flock->fl_end - flock->fl_start;
1299 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1300 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1301 struct TCP_Server_Info *server = tcon->ses->server;
1302 __u16 netfid = cfile->fid.netfid;
1305 int posix_lock_type;
1307 rc = cifs_posix_lock_test(file, flock);
1311 if (type & server->vals->shared_lock_type)
1312 posix_lock_type = CIFS_RDLCK;
1314 posix_lock_type = CIFS_WRLCK;
1315 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1316 flock->fl_start, length, flock,
1317 posix_lock_type, wait_flag);
1321 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1325 /* BB we could chain these into one lock request BB */
1326 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1329 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1331 flock->fl_type = F_UNLCK;
1333 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1338 if (type & server->vals->shared_lock_type) {
1339 flock->fl_type = F_WRLCK;
1343 type &= ~server->vals->exclusive_lock_type;
1345 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1346 type | server->vals->shared_lock_type,
1349 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1350 type | server->vals->shared_lock_type, 0, 1, false);
1351 flock->fl_type = F_RDLCK;
1353 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1356 flock->fl_type = F_WRLCK;
1362 cifs_move_llist(struct list_head *source, struct list_head *dest)
1364 struct list_head *li, *tmp;
1365 list_for_each_safe(li, tmp, source)
1366 list_move(li, dest);
1370 cifs_free_llist(struct list_head *llist)
1372 struct cifsLockInfo *li, *tmp;
1373 list_for_each_entry_safe(li, tmp, llist, llist) {
1374 cifs_del_lock_waiters(li);
1375 list_del(&li->llist);
1381 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1384 int rc = 0, stored_rc;
1385 int types[] = {LOCKING_ANDX_LARGE_FILES,
1386 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1388 unsigned int max_num, num, max_buf;
1389 LOCKING_ANDX_RANGE *buf, *cur;
1390 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1391 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1392 struct cifsLockInfo *li, *tmp;
1393 __u64 length = 1 + flock->fl_end - flock->fl_start;
1394 struct list_head tmp_llist;
1396 INIT_LIST_HEAD(&tmp_llist);
1399 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1400 * and check it for zero before using.
1402 max_buf = tcon->ses->server->maxBuf;
1406 max_num = (max_buf - sizeof(struct smb_hdr)) /
1407 sizeof(LOCKING_ANDX_RANGE);
1408 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1412 down_write(&cinode->lock_sem);
1413 for (i = 0; i < 2; i++) {
1416 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1417 if (flock->fl_start > li->offset ||
1418 (flock->fl_start + length) <
1419 (li->offset + li->length))
1421 if (current->tgid != li->pid)
1423 if (types[i] != li->type)
1425 if (cinode->can_cache_brlcks) {
1427 * We can cache brlock requests - simply remove
1428 * a lock from the file's list.
1430 list_del(&li->llist);
1431 cifs_del_lock_waiters(li);
1435 cur->Pid = cpu_to_le16(li->pid);
1436 cur->LengthLow = cpu_to_le32((u32)li->length);
1437 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1438 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1439 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1441 * We need to save a lock here to let us add it again to
1442 * the file's list if the unlock range request fails on
1445 list_move(&li->llist, &tmp_llist);
1446 if (++num == max_num) {
1447 stored_rc = cifs_lockv(xid, tcon,
1449 li->type, num, 0, buf);
1452 * We failed on the unlock range
1453 * request - add all locks from the tmp
1454 * list to the head of the file's list.
1456 cifs_move_llist(&tmp_llist,
1457 &cfile->llist->locks);
1461 * The unlock range request succeed -
1462 * free the tmp list.
1464 cifs_free_llist(&tmp_llist);
1471 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1472 types[i], num, 0, buf);
1474 cifs_move_llist(&tmp_llist,
1475 &cfile->llist->locks);
1478 cifs_free_llist(&tmp_llist);
1482 up_write(&cinode->lock_sem);
1488 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1489 bool wait_flag, bool posix_lck, int lock, int unlock,
1493 __u64 length = 1 + flock->fl_end - flock->fl_start;
1494 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1495 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1496 struct TCP_Server_Info *server = tcon->ses->server;
1497 struct inode *inode = cfile->dentry->d_inode;
1500 int posix_lock_type;
1502 rc = cifs_posix_lock_set(file, flock);
1506 if (type & server->vals->shared_lock_type)
1507 posix_lock_type = CIFS_RDLCK;
1509 posix_lock_type = CIFS_WRLCK;
1512 posix_lock_type = CIFS_UNLCK;
1514 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1515 current->tgid, flock->fl_start, length,
1516 NULL, posix_lock_type, wait_flag);
1521 struct cifsLockInfo *lock;
1523 lock = cifs_lock_init(flock->fl_start, length, type);
1527 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1536 * Windows 7 server can delay breaking lease from read to None
1537 * if we set a byte-range lock on a file - break it explicitly
1538 * before sending the lock to the server to be sure the next
1539 * read won't conflict with non-overlapted locks due to
1542 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1543 CIFS_CACHE_READ(CIFS_I(inode))) {
1544 cifs_zap_mapping(inode);
1545 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1547 CIFS_I(inode)->oplock = 0;
1550 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1551 type, 1, 0, wait_flag);
1557 cifs_lock_add(cfile, lock);
1559 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1562 if (flock->fl_flags & FL_POSIX)
1563 posix_lock_file_wait(file, flock);
1567 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1570 int lock = 0, unlock = 0;
1571 bool wait_flag = false;
1572 bool posix_lck = false;
1573 struct cifs_sb_info *cifs_sb;
1574 struct cifs_tcon *tcon;
1575 struct cifsInodeInfo *cinode;
1576 struct cifsFileInfo *cfile;
1583 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1584 cmd, flock->fl_flags, flock->fl_type,
1585 flock->fl_start, flock->fl_end);
1587 cfile = (struct cifsFileInfo *)file->private_data;
1588 tcon = tlink_tcon(cfile->tlink);
1590 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1593 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1594 netfid = cfile->fid.netfid;
1595 cinode = CIFS_I(file_inode(file));
1597 if (cap_unix(tcon->ses) &&
1598 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1599 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1602 * BB add code here to normalize offset and length to account for
1603 * negative length which we can not accept over the wire.
1605 if (IS_GETLK(cmd)) {
1606 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1611 if (!lock && !unlock) {
1613 * if no lock or unlock then nothing to do since we do not
1620 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1627 * update the file size (if needed) after a write. Should be called with
1628 * the inode->i_lock held
1631 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1632 unsigned int bytes_written)
1634 loff_t end_of_write = offset + bytes_written;
1636 if (end_of_write > cifsi->server_eof)
1637 cifsi->server_eof = end_of_write;
1641 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1642 size_t write_size, loff_t *offset)
1645 unsigned int bytes_written = 0;
1646 unsigned int total_written;
1647 struct cifs_sb_info *cifs_sb;
1648 struct cifs_tcon *tcon;
1649 struct TCP_Server_Info *server;
1651 struct dentry *dentry = open_file->dentry;
1652 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1653 struct cifs_io_parms io_parms;
1655 cifs_sb = CIFS_SB(dentry->d_sb);
1657 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1658 write_size, *offset, dentry);
1660 tcon = tlink_tcon(open_file->tlink);
1661 server = tcon->ses->server;
1663 if (!server->ops->sync_write)
1668 for (total_written = 0; write_size > total_written;
1669 total_written += bytes_written) {
1671 while (rc == -EAGAIN) {
1675 if (open_file->invalidHandle) {
1676 /* we could deadlock if we called
1677 filemap_fdatawait from here so tell
1678 reopen_file not to flush data to
1680 rc = cifs_reopen_file(open_file, false);
1685 len = min(server->ops->wp_retry_size(dentry->d_inode),
1686 (unsigned int)write_size - total_written);
1687 /* iov[0] is reserved for smb header */
1688 iov[1].iov_base = (char *)write_data + total_written;
1689 iov[1].iov_len = len;
1691 io_parms.tcon = tcon;
1692 io_parms.offset = *offset;
1693 io_parms.length = len;
1694 rc = server->ops->sync_write(xid, &open_file->fid,
1695 &io_parms, &bytes_written, iov, 1);
1697 if (rc || (bytes_written == 0)) {
1705 spin_lock(&dentry->d_inode->i_lock);
1706 cifs_update_eof(cifsi, *offset, bytes_written);
1707 spin_unlock(&dentry->d_inode->i_lock);
1708 *offset += bytes_written;
1712 cifs_stats_bytes_written(tcon, total_written);
1714 if (total_written > 0) {
1715 spin_lock(&dentry->d_inode->i_lock);
1716 if (*offset > dentry->d_inode->i_size)
1717 i_size_write(dentry->d_inode, *offset);
1718 spin_unlock(&dentry->d_inode->i_lock);
1720 mark_inode_dirty_sync(dentry->d_inode);
1722 return total_written;
1725 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1728 struct cifsFileInfo *open_file = NULL;
1729 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1731 /* only filter by fsuid on multiuser mounts */
1732 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1735 spin_lock(&cifs_file_list_lock);
1736 /* we could simply get the first_list_entry since write-only entries
1737 are always at the end of the list but since the first entry might
1738 have a close pending, we go through the whole list */
1739 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1740 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1742 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1743 if (!open_file->invalidHandle) {
1744 /* found a good file */
1745 /* lock it so it will not be closed on us */
1746 cifsFileInfo_get_locked(open_file);
1747 spin_unlock(&cifs_file_list_lock);
1749 } /* else might as well continue, and look for
1750 another, or simply have the caller reopen it
1751 again rather than trying to fix this handle */
1752 } else /* write only file */
1753 break; /* write only files are last so must be done */
1755 spin_unlock(&cifs_file_list_lock);
1759 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1762 struct cifsFileInfo *open_file, *inv_file = NULL;
1763 struct cifs_sb_info *cifs_sb;
1764 bool any_available = false;
1766 unsigned int refind = 0;
1768 /* Having a null inode here (because mapping->host was set to zero by
1769 the VFS or MM) should not happen but we had reports of on oops (due to
1770 it being zero) during stress testcases so we need to check for it */
1772 if (cifs_inode == NULL) {
1773 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1778 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1780 /* only filter by fsuid on multiuser mounts */
1781 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1784 spin_lock(&cifs_file_list_lock);
1786 if (refind > MAX_REOPEN_ATT) {
1787 spin_unlock(&cifs_file_list_lock);
1790 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1791 if (!any_available && open_file->pid != current->tgid)
1793 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1795 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1796 if (!open_file->invalidHandle) {
1797 /* found a good writable file */
1798 cifsFileInfo_get_locked(open_file);
1799 spin_unlock(&cifs_file_list_lock);
1803 inv_file = open_file;
1807 /* couldn't find useable FH with same pid, try any available */
1808 if (!any_available) {
1809 any_available = true;
1810 goto refind_writable;
1814 any_available = false;
1815 cifsFileInfo_get_locked(inv_file);
1818 spin_unlock(&cifs_file_list_lock);
1821 rc = cifs_reopen_file(inv_file, false);
1825 spin_lock(&cifs_file_list_lock);
1826 list_move_tail(&inv_file->flist,
1827 &cifs_inode->openFileList);
1828 spin_unlock(&cifs_file_list_lock);
1829 cifsFileInfo_put(inv_file);
1830 spin_lock(&cifs_file_list_lock);
1833 goto refind_writable;
1840 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1842 struct address_space *mapping = page->mapping;
1843 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1846 int bytes_written = 0;
1847 struct inode *inode;
1848 struct cifsFileInfo *open_file;
1850 if (!mapping || !mapping->host)
1853 inode = page->mapping->host;
1855 offset += (loff_t)from;
1856 write_data = kmap(page);
1859 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1864 /* racing with truncate? */
1865 if (offset > mapping->host->i_size) {
1867 return 0; /* don't care */
1870 /* check to make sure that we are not extending the file */
1871 if (mapping->host->i_size - offset < (loff_t)to)
1872 to = (unsigned)(mapping->host->i_size - offset);
1874 open_file = find_writable_file(CIFS_I(mapping->host), false);
1876 bytes_written = cifs_write(open_file, open_file->pid,
1877 write_data, to - from, &offset);
1878 cifsFileInfo_put(open_file);
1879 /* Does mm or vfs already set times? */
1880 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1881 if ((bytes_written > 0) && (offset))
1883 else if (bytes_written < 0)
1886 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1894 static struct cifs_writedata *
1895 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1896 pgoff_t end, pgoff_t *index,
1897 unsigned int *found_pages)
1899 unsigned int nr_pages;
1900 struct page **pages;
1901 struct cifs_writedata *wdata;
1903 wdata = cifs_writedata_alloc((unsigned int)tofind,
1904 cifs_writev_complete);
1909 * find_get_pages_tag seems to return a max of 256 on each
1910 * iteration, so we must call it several times in order to
1911 * fill the array or the wsize is effectively limited to
1912 * 256 * PAGE_CACHE_SIZE.
1915 pages = wdata->pages;
1917 nr_pages = find_get_pages_tag(mapping, index,
1918 PAGECACHE_TAG_DIRTY, tofind,
1920 *found_pages += nr_pages;
1923 } while (nr_pages && tofind && *index <= end);
1929 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1930 struct address_space *mapping,
1931 struct writeback_control *wbc,
1932 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1934 unsigned int nr_pages = 0, i;
1937 for (i = 0; i < found_pages; i++) {
1938 page = wdata->pages[i];
1940 * At this point we hold neither mapping->tree_lock nor
1941 * lock on the page itself: the page may be truncated or
1942 * invalidated (changing page->mapping to NULL), or even
1943 * swizzled back from swapper_space to tmpfs file
1949 else if (!trylock_page(page))
1952 if (unlikely(page->mapping != mapping)) {
1957 if (!wbc->range_cyclic && page->index > end) {
1963 if (*next && (page->index != *next)) {
1964 /* Not next consecutive page */
1969 if (wbc->sync_mode != WB_SYNC_NONE)
1970 wait_on_page_writeback(page);
1972 if (PageWriteback(page) ||
1973 !clear_page_dirty_for_io(page)) {
1979 * This actually clears the dirty bit in the radix tree.
1980 * See cifs_writepage() for more commentary.
1982 set_page_writeback(page);
1983 if (page_offset(page) >= i_size_read(mapping->host)) {
1986 end_page_writeback(page);
1990 wdata->pages[i] = page;
1991 *next = page->index + 1;
1995 /* reset index to refind any pages skipped */
1997 *index = wdata->pages[0]->index + 1;
1999 /* put any pages we aren't going to use */
2000 for (i = nr_pages; i < found_pages; i++) {
2001 page_cache_release(wdata->pages[i]);
2002 wdata->pages[i] = NULL;
2009 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2010 struct address_space *mapping, struct writeback_control *wbc)
2013 struct TCP_Server_Info *server;
2016 wdata->sync_mode = wbc->sync_mode;
2017 wdata->nr_pages = nr_pages;
2018 wdata->offset = page_offset(wdata->pages[0]);
2019 wdata->pagesz = PAGE_CACHE_SIZE;
2020 wdata->tailsz = min(i_size_read(mapping->host) -
2021 page_offset(wdata->pages[nr_pages - 1]),
2022 (loff_t)PAGE_CACHE_SIZE);
2023 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2025 if (wdata->cfile != NULL)
2026 cifsFileInfo_put(wdata->cfile);
2027 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2028 if (!wdata->cfile) {
2029 cifs_dbg(VFS, "No writable handles for inode\n");
2032 wdata->pid = wdata->cfile->pid;
2033 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2034 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2037 for (i = 0; i < nr_pages; ++i)
2038 unlock_page(wdata->pages[i]);
2043 static int cifs_writepages(struct address_space *mapping,
2044 struct writeback_control *wbc)
2046 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2047 struct TCP_Server_Info *server;
2048 bool done = false, scanned = false, range_whole = false;
2050 struct cifs_writedata *wdata;
2054 * If wsize is smaller than the page cache size, default to writing
2055 * one page at a time via cifs_writepage
2057 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2058 return generic_writepages(mapping, wbc);
2060 if (wbc->range_cyclic) {
2061 index = mapping->writeback_index; /* Start from prev offset */
2064 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2065 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2066 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2070 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2072 while (!done && index <= end) {
2073 unsigned int i, nr_pages, found_pages, wsize, credits;
2074 pgoff_t next = 0, tofind, saved_index = index;
2076 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2081 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2083 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2087 add_credits_and_wake_if(server, credits, 0);
2091 if (found_pages == 0) {
2092 kref_put(&wdata->refcount, cifs_writedata_release);
2093 add_credits_and_wake_if(server, credits, 0);
2097 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2098 end, &index, &next, &done);
2100 /* nothing to write? */
2101 if (nr_pages == 0) {
2102 kref_put(&wdata->refcount, cifs_writedata_release);
2103 add_credits_and_wake_if(server, credits, 0);
2107 wdata->credits = credits;
2109 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2111 /* send failure -- clean up the mess */
2113 add_credits_and_wake_if(server, wdata->credits, 0);
2114 for (i = 0; i < nr_pages; ++i) {
2116 redirty_page_for_writepage(wbc,
2119 SetPageError(wdata->pages[i]);
2120 end_page_writeback(wdata->pages[i]);
2121 page_cache_release(wdata->pages[i]);
2124 mapping_set_error(mapping, rc);
2126 kref_put(&wdata->refcount, cifs_writedata_release);
2128 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2129 index = saved_index;
2133 wbc->nr_to_write -= nr_pages;
2134 if (wbc->nr_to_write <= 0)
2140 if (!scanned && !done) {
2142 * We hit the last page and there is more work to be done: wrap
2143 * back to the start of the file
2150 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2151 mapping->writeback_index = index;
2157 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2163 /* BB add check for wbc flags */
2164 page_cache_get(page);
2165 if (!PageUptodate(page))
2166 cifs_dbg(FYI, "ppw - page not up to date\n");
2169 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2171 * A writepage() implementation always needs to do either this,
2172 * or re-dirty the page with "redirty_page_for_writepage()" in
2173 * the case of a failure.
2175 * Just unlocking the page will cause the radix tree tag-bits
2176 * to fail to update with the state of the page correctly.
2178 set_page_writeback(page);
2180 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2181 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2183 else if (rc == -EAGAIN)
2184 redirty_page_for_writepage(wbc, page);
2188 SetPageUptodate(page);
2189 end_page_writeback(page);
2190 page_cache_release(page);
2195 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2197 int rc = cifs_writepage_locked(page, wbc);
2202 static int cifs_write_end(struct file *file, struct address_space *mapping,
2203 loff_t pos, unsigned len, unsigned copied,
2204 struct page *page, void *fsdata)
2207 struct inode *inode = mapping->host;
2208 struct cifsFileInfo *cfile = file->private_data;
2209 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2212 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2215 pid = current->tgid;
2217 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2220 if (PageChecked(page)) {
2222 SetPageUptodate(page);
2223 ClearPageChecked(page);
2224 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2225 SetPageUptodate(page);
2227 if (!PageUptodate(page)) {
2229 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2233 /* this is probably better than directly calling
2234 partialpage_write since in this function the file handle is
2235 known which we might as well leverage */
2236 /* BB check if anything else missing out of ppw
2237 such as updating last write time */
2238 page_data = kmap(page);
2239 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2240 /* if (rc < 0) should we set writebehind rc? */
2247 set_page_dirty(page);
2251 spin_lock(&inode->i_lock);
2252 if (pos > inode->i_size)
2253 i_size_write(inode, pos);
2254 spin_unlock(&inode->i_lock);
2258 page_cache_release(page);
2263 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2268 struct cifs_tcon *tcon;
2269 struct TCP_Server_Info *server;
2270 struct cifsFileInfo *smbfile = file->private_data;
2271 struct inode *inode = file_inode(file);
2272 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2274 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2277 mutex_lock(&inode->i_mutex);
2281 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2284 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2285 rc = cifs_zap_mapping(inode);
2287 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2288 rc = 0; /* don't care about it in fsync */
2292 tcon = tlink_tcon(smbfile->tlink);
2293 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2294 server = tcon->ses->server;
2295 if (server->ops->flush)
2296 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2302 mutex_unlock(&inode->i_mutex);
2306 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2310 struct cifs_tcon *tcon;
2311 struct TCP_Server_Info *server;
2312 struct cifsFileInfo *smbfile = file->private_data;
2313 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2314 struct inode *inode = file->f_mapping->host;
2316 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2319 mutex_lock(&inode->i_mutex);
2323 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2326 tcon = tlink_tcon(smbfile->tlink);
2327 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2328 server = tcon->ses->server;
2329 if (server->ops->flush)
2330 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2336 mutex_unlock(&inode->i_mutex);
2341 * As file closes, flush all cached write data for this inode checking
2342 * for write behind errors.
2344 int cifs_flush(struct file *file, fl_owner_t id)
2346 struct inode *inode = file_inode(file);
2349 if (file->f_mode & FMODE_WRITE)
2350 rc = filemap_write_and_wait(inode->i_mapping);
2352 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2358 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2363 for (i = 0; i < num_pages; i++) {
2364 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2367 * save number of pages we have already allocated and
2368 * return with ENOMEM error
2377 for (i = 0; i < num_pages; i++)
2384 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2389 clen = min_t(const size_t, len, wsize);
2390 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2399 cifs_uncached_writedata_release(struct kref *refcount)
2402 struct cifs_writedata *wdata = container_of(refcount,
2403 struct cifs_writedata, refcount);
2405 for (i = 0; i < wdata->nr_pages; i++)
2406 put_page(wdata->pages[i]);
2407 cifs_writedata_release(refcount);
2411 cifs_uncached_writev_complete(struct work_struct *work)
2413 struct cifs_writedata *wdata = container_of(work,
2414 struct cifs_writedata, work);
2415 struct inode *inode = wdata->cfile->dentry->d_inode;
2416 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2418 spin_lock(&inode->i_lock);
2419 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2420 if (cifsi->server_eof > inode->i_size)
2421 i_size_write(inode, cifsi->server_eof);
2422 spin_unlock(&inode->i_lock);
2424 complete(&wdata->done);
2426 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2430 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2431 size_t *len, unsigned long *num_pages)
2433 size_t save_len, copied, bytes, cur_len = *len;
2434 unsigned long i, nr_pages = *num_pages;
2437 for (i = 0; i < nr_pages; i++) {
2438 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2439 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2442 * If we didn't copy as much as we expected, then that
2443 * may mean we trod into an unmapped area. Stop copying
2444 * at that point. On the next pass through the big
2445 * loop, we'll likely end up getting a zero-length
2446 * write and bailing out of it.
2451 cur_len = save_len - cur_len;
2455 * If we have no data to send, then that probably means that
2456 * the copy above failed altogether. That's most likely because
2457 * the address in the iovec was bogus. Return -EFAULT and let
2458 * the caller free anything we allocated and bail out.
2464 * i + 1 now represents the number of pages we actually used in
2465 * the copy phase above.
2472 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2473 struct cifsFileInfo *open_file,
2474 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2478 unsigned long nr_pages, num_pages, i;
2479 struct cifs_writedata *wdata;
2480 struct iov_iter saved_from;
2481 loff_t saved_offset = offset;
2483 struct TCP_Server_Info *server;
2485 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2486 pid = open_file->pid;
2488 pid = current->tgid;
2490 server = tlink_tcon(open_file->tlink)->ses->server;
2491 memcpy(&saved_from, from, sizeof(struct iov_iter));
2494 unsigned int wsize, credits;
2496 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2501 nr_pages = get_numpages(wsize, len, &cur_len);
2502 wdata = cifs_writedata_alloc(nr_pages,
2503 cifs_uncached_writev_complete);
2506 add_credits_and_wake_if(server, credits, 0);
2510 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2513 add_credits_and_wake_if(server, credits, 0);
2517 num_pages = nr_pages;
2518 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2520 for (i = 0; i < nr_pages; i++)
2521 put_page(wdata->pages[i]);
2523 add_credits_and_wake_if(server, credits, 0);
2528 * Bring nr_pages down to the number of pages we actually used,
2529 * and free any pages that we didn't use.
2531 for ( ; nr_pages > num_pages; nr_pages--)
2532 put_page(wdata->pages[nr_pages - 1]);
2534 wdata->sync_mode = WB_SYNC_ALL;
2535 wdata->nr_pages = nr_pages;
2536 wdata->offset = (__u64)offset;
2537 wdata->cfile = cifsFileInfo_get(open_file);
2539 wdata->bytes = cur_len;
2540 wdata->pagesz = PAGE_SIZE;
2541 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2542 wdata->credits = credits;
2544 if (!wdata->cfile->invalidHandle ||
2545 !cifs_reopen_file(wdata->cfile, false))
2546 rc = server->ops->async_writev(wdata,
2547 cifs_uncached_writedata_release);
2549 add_credits_and_wake_if(server, wdata->credits, 0);
2550 kref_put(&wdata->refcount,
2551 cifs_uncached_writedata_release);
2552 if (rc == -EAGAIN) {
2553 memcpy(from, &saved_from,
2554 sizeof(struct iov_iter));
2555 iov_iter_advance(from, offset - saved_offset);
2561 list_add_tail(&wdata->list, wdata_list);
2570 cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2573 ssize_t total_written = 0;
2574 struct cifsFileInfo *open_file;
2575 struct cifs_tcon *tcon;
2576 struct cifs_sb_info *cifs_sb;
2577 struct cifs_writedata *wdata, *tmp;
2578 struct list_head wdata_list;
2579 struct iov_iter saved_from;
2582 len = iov_iter_count(from);
2583 rc = generic_write_checks(file, poffset, &len, 0);
2590 iov_iter_truncate(from, len);
2592 INIT_LIST_HEAD(&wdata_list);
2593 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2594 open_file = file->private_data;
2595 tcon = tlink_tcon(open_file->tlink);
2597 if (!tcon->ses->server->ops->async_writev)
2600 memcpy(&saved_from, from, sizeof(struct iov_iter));
2602 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2606 * If at least one write was successfully sent, then discard any rc
2607 * value from the later writes. If the other write succeeds, then
2608 * we'll end up returning whatever was written. If it fails, then
2609 * we'll get a new rc value from that.
2611 if (!list_empty(&wdata_list))
2615 * Wait for and collect replies for any successful sends in order of
2616 * increasing offset. Once an error is hit or we get a fatal signal
2617 * while waiting, then return without waiting for any more replies.
2620 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2622 /* FIXME: freezable too? */
2623 rc = wait_for_completion_killable(&wdata->done);
2626 else if (wdata->result)
2629 total_written += wdata->bytes;
2631 /* resend call if it's a retryable error */
2632 if (rc == -EAGAIN) {
2633 struct list_head tmp_list;
2634 struct iov_iter tmp_from;
2636 INIT_LIST_HEAD(&tmp_list);
2637 list_del_init(&wdata->list);
2639 memcpy(&tmp_from, &saved_from,
2640 sizeof(struct iov_iter));
2641 iov_iter_advance(&tmp_from,
2642 wdata->offset - *poffset);
2644 rc = cifs_write_from_iter(wdata->offset,
2645 wdata->bytes, &tmp_from,
2646 open_file, cifs_sb, &tmp_list);
2648 list_splice(&tmp_list, &wdata_list);
2650 kref_put(&wdata->refcount,
2651 cifs_uncached_writedata_release);
2655 list_del_init(&wdata->list);
2656 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2659 if (total_written > 0)
2660 *poffset += total_written;
2662 cifs_stats_bytes_written(tcon, total_written);
2663 return total_written ? total_written : (ssize_t)rc;
2666 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2669 struct inode *inode;
2670 loff_t pos = iocb->ki_pos;
2672 inode = file_inode(iocb->ki_filp);
2675 * BB - optimize the way when signing is disabled. We can drop this
2676 * extra memory-to-memory copying and use iovec buffers for constructing
2680 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2682 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2690 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2692 struct file *file = iocb->ki_filp;
2693 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2694 struct inode *inode = file->f_mapping->host;
2695 struct cifsInodeInfo *cinode = CIFS_I(inode);
2696 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2697 ssize_t rc = -EACCES;
2698 loff_t lock_pos = iocb->ki_pos;
2701 * We need to hold the sem to be sure nobody modifies lock list
2702 * with a brlock that prevents writing.
2704 down_read(&cinode->lock_sem);
2705 mutex_lock(&inode->i_mutex);
2706 if (file->f_flags & O_APPEND)
2707 lock_pos = i_size_read(inode);
2708 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
2709 server->vals->exclusive_lock_type, NULL,
2711 rc = __generic_file_write_iter(iocb, from);
2712 mutex_unlock(&inode->i_mutex);
2717 err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2722 mutex_unlock(&inode->i_mutex);
2724 up_read(&cinode->lock_sem);
2729 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2731 struct inode *inode = file_inode(iocb->ki_filp);
2732 struct cifsInodeInfo *cinode = CIFS_I(inode);
2733 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2734 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2735 iocb->ki_filp->private_data;
2736 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2739 written = cifs_get_writer(cinode);
2743 if (CIFS_CACHE_WRITE(cinode)) {
2744 if (cap_unix(tcon->ses) &&
2745 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2746 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2747 written = generic_file_write_iter(iocb, from);
2750 written = cifs_writev(iocb, from);
2754 * For non-oplocked files in strict cache mode we need to write the data
2755 * to the server exactly from the pos to pos+len-1 rather than flush all
2756 * affected pages because it may cause a error with mandatory locks on
2757 * these pages but not on the region from pos to ppos+len-1.
2759 written = cifs_user_writev(iocb, from);
2760 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2762 * Windows 7 server can delay breaking level2 oplock if a write
2763 * request comes - break it on the client to prevent reading
2766 cifs_zap_mapping(inode);
2767 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2772 cifs_put_writer(cinode);
2776 static struct cifs_readdata *
2777 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2779 struct cifs_readdata *rdata;
2781 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2783 if (rdata != NULL) {
2784 kref_init(&rdata->refcount);
2785 INIT_LIST_HEAD(&rdata->list);
2786 init_completion(&rdata->done);
2787 INIT_WORK(&rdata->work, complete);
2794 cifs_readdata_release(struct kref *refcount)
2796 struct cifs_readdata *rdata = container_of(refcount,
2797 struct cifs_readdata, refcount);
2800 cifsFileInfo_put(rdata->cfile);
2806 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2812 for (i = 0; i < nr_pages; i++) {
2813 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2818 rdata->pages[i] = page;
2822 for (i = 0; i < nr_pages; i++) {
2823 put_page(rdata->pages[i]);
2824 rdata->pages[i] = NULL;
2831 cifs_uncached_readdata_release(struct kref *refcount)
2833 struct cifs_readdata *rdata = container_of(refcount,
2834 struct cifs_readdata, refcount);
2837 for (i = 0; i < rdata->nr_pages; i++) {
2838 put_page(rdata->pages[i]);
2839 rdata->pages[i] = NULL;
2841 cifs_readdata_release(refcount);
2845 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2846 * @rdata: the readdata response with list of pages holding data
2847 * @iter: destination for our data
2849 * This function copies data from a list of pages in a readdata response into
2850 * an array of iovecs. It will first calculate where the data should go
2851 * based on the info in the readdata and then copy the data into that spot.
2854 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2856 size_t remaining = rdata->got_bytes;
2859 for (i = 0; i < rdata->nr_pages; i++) {
2860 struct page *page = rdata->pages[i];
2861 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2862 size_t written = copy_page_to_iter(page, 0, copy, iter);
2863 remaining -= written;
2864 if (written < copy && iov_iter_count(iter) > 0)
2867 return remaining ? -EFAULT : 0;
2871 cifs_uncached_readv_complete(struct work_struct *work)
2873 struct cifs_readdata *rdata = container_of(work,
2874 struct cifs_readdata, work);
2876 complete(&rdata->done);
2877 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2881 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2882 struct cifs_readdata *rdata, unsigned int len)
2886 unsigned int nr_pages = rdata->nr_pages;
2889 rdata->got_bytes = 0;
2890 rdata->tailsz = PAGE_SIZE;
2891 for (i = 0; i < nr_pages; i++) {
2892 struct page *page = rdata->pages[i];
2894 if (len >= PAGE_SIZE) {
2895 /* enough data to fill the page */
2896 iov.iov_base = kmap(page);
2897 iov.iov_len = PAGE_SIZE;
2898 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2899 i, iov.iov_base, iov.iov_len);
2901 } else if (len > 0) {
2902 /* enough for partial page, fill and zero the rest */
2903 iov.iov_base = kmap(page);
2905 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2906 i, iov.iov_base, iov.iov_len);
2907 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2908 rdata->tailsz = len;
2911 /* no need to hold page hostage */
2912 rdata->pages[i] = NULL;
2918 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2923 rdata->got_bytes += result;
2926 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2927 rdata->got_bytes : result;
2931 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2932 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2934 struct cifs_readdata *rdata;
2935 unsigned int npages, rsize, credits;
2939 struct TCP_Server_Info *server;
2941 server = tlink_tcon(open_file->tlink)->ses->server;
2943 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2944 pid = open_file->pid;
2946 pid = current->tgid;
2949 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2954 cur_len = min_t(const size_t, len, rsize);
2955 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2957 /* allocate a readdata struct */
2958 rdata = cifs_readdata_alloc(npages,
2959 cifs_uncached_readv_complete);
2961 add_credits_and_wake_if(server, credits, 0);
2966 rc = cifs_read_allocate_pages(rdata, npages);
2970 rdata->cfile = cifsFileInfo_get(open_file);
2971 rdata->nr_pages = npages;
2972 rdata->offset = offset;
2973 rdata->bytes = cur_len;
2975 rdata->pagesz = PAGE_SIZE;
2976 rdata->read_into_pages = cifs_uncached_read_into_pages;
2977 rdata->credits = credits;
2979 if (!rdata->cfile->invalidHandle ||
2980 !cifs_reopen_file(rdata->cfile, true))
2981 rc = server->ops->async_readv(rdata);
2984 add_credits_and_wake_if(server, rdata->credits, 0);
2985 kref_put(&rdata->refcount,
2986 cifs_uncached_readdata_release);
2992 list_add_tail(&rdata->list, rdata_list);
3000 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3002 struct file *file = iocb->ki_filp;
3005 ssize_t total_read = 0;
3006 loff_t offset = iocb->ki_pos;
3007 struct cifs_sb_info *cifs_sb;
3008 struct cifs_tcon *tcon;
3009 struct cifsFileInfo *open_file;
3010 struct cifs_readdata *rdata, *tmp;
3011 struct list_head rdata_list;
3013 len = iov_iter_count(to);
3017 INIT_LIST_HEAD(&rdata_list);
3018 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3019 open_file = file->private_data;
3020 tcon = tlink_tcon(open_file->tlink);
3022 if (!tcon->ses->server->ops->async_readv)
3025 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3026 cifs_dbg(FYI, "attempting read on write only file instance\n");
3028 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3030 /* if at least one read request send succeeded, then reset rc */
3031 if (!list_empty(&rdata_list))
3034 len = iov_iter_count(to);
3035 /* the loop below should proceed in the order of increasing offsets */
3037 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3039 /* FIXME: freezable sleep too? */
3040 rc = wait_for_completion_killable(&rdata->done);
3043 else if (rdata->result == -EAGAIN) {
3044 /* resend call if it's a retryable error */
3045 struct list_head tmp_list;
3046 unsigned int got_bytes = rdata->got_bytes;
3048 list_del_init(&rdata->list);
3049 INIT_LIST_HEAD(&tmp_list);
3052 * Got a part of data and then reconnect has
3053 * happened -- fill the buffer and continue
3056 if (got_bytes && got_bytes < rdata->bytes) {
3057 rc = cifs_readdata_to_iov(rdata, to);
3059 kref_put(&rdata->refcount,
3060 cifs_uncached_readdata_release);
3065 rc = cifs_send_async_read(
3066 rdata->offset + got_bytes,
3067 rdata->bytes - got_bytes,
3068 rdata->cfile, cifs_sb,
3071 list_splice(&tmp_list, &rdata_list);
3073 kref_put(&rdata->refcount,
3074 cifs_uncached_readdata_release);
3076 } else if (rdata->result)
3079 rc = cifs_readdata_to_iov(rdata, to);
3081 /* if there was a short read -- discard anything left */
3082 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3085 list_del_init(&rdata->list);
3086 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3089 total_read = len - iov_iter_count(to);
3091 cifs_stats_bytes_read(tcon, total_read);
3093 /* mask nodata case */
3098 iocb->ki_pos += total_read;
3105 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3107 struct inode *inode = file_inode(iocb->ki_filp);
3108 struct cifsInodeInfo *cinode = CIFS_I(inode);
3109 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3110 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3111 iocb->ki_filp->private_data;
3112 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3116 * In strict cache mode we need to read from the server all the time
3117 * if we don't have level II oplock because the server can delay mtime
3118 * change - so we can't make a decision about inode invalidating.
3119 * And we can also fail with pagereading if there are mandatory locks
3120 * on pages affected by this read but not on the region from pos to
3123 if (!CIFS_CACHE_READ(cinode))
3124 return cifs_user_readv(iocb, to);
3126 if (cap_unix(tcon->ses) &&
3127 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3128 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3129 return generic_file_read_iter(iocb, to);
3132 * We need to hold the sem to be sure nobody modifies lock list
3133 * with a brlock that prevents reading.
3135 down_read(&cinode->lock_sem);
3136 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3137 tcon->ses->server->vals->shared_lock_type,
3138 NULL, CIFS_READ_OP))
3139 rc = generic_file_read_iter(iocb, to);
3140 up_read(&cinode->lock_sem);
3145 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3148 unsigned int bytes_read = 0;
3149 unsigned int total_read;
3150 unsigned int current_read_size;
3152 struct cifs_sb_info *cifs_sb;
3153 struct cifs_tcon *tcon;
3154 struct TCP_Server_Info *server;
3157 struct cifsFileInfo *open_file;
3158 struct cifs_io_parms io_parms;
3159 int buf_type = CIFS_NO_BUFFER;
3163 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3165 /* FIXME: set up handlers for larger reads and/or convert to async */
3166 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3168 if (file->private_data == NULL) {
3173 open_file = file->private_data;
3174 tcon = tlink_tcon(open_file->tlink);
3175 server = tcon->ses->server;
3177 if (!server->ops->sync_read) {
3182 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3183 pid = open_file->pid;
3185 pid = current->tgid;
3187 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3188 cifs_dbg(FYI, "attempting read on write only file instance\n");
3190 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3191 total_read += bytes_read, cur_offset += bytes_read) {
3193 current_read_size = min_t(uint, read_size - total_read,
3196 * For windows me and 9x we do not want to request more
3197 * than it negotiated since it will refuse the read
3200 if ((tcon->ses) && !(tcon->ses->capabilities &
3201 tcon->ses->server->vals->cap_large_files)) {
3202 current_read_size = min_t(uint,
3203 current_read_size, CIFSMaxBufSize);
3205 if (open_file->invalidHandle) {
3206 rc = cifs_reopen_file(open_file, true);
3211 io_parms.tcon = tcon;
3212 io_parms.offset = *offset;
3213 io_parms.length = current_read_size;
3214 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3215 &bytes_read, &cur_offset,
3217 } while (rc == -EAGAIN);
3219 if (rc || (bytes_read == 0)) {
3227 cifs_stats_bytes_read(tcon, total_read);
3228 *offset += bytes_read;
3236 * If the page is mmap'ed into a process' page tables, then we need to make
3237 * sure that it doesn't change while being written back.
3240 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3242 struct page *page = vmf->page;
3245 return VM_FAULT_LOCKED;
3248 static struct vm_operations_struct cifs_file_vm_ops = {
3249 .fault = filemap_fault,
3250 .map_pages = filemap_map_pages,
3251 .page_mkwrite = cifs_page_mkwrite,
3252 .remap_pages = generic_file_remap_pages,
3255 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3258 struct inode *inode = file_inode(file);
3262 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3263 rc = cifs_zap_mapping(inode);
3268 rc = generic_file_mmap(file, vma);
3270 vma->vm_ops = &cifs_file_vm_ops;
3275 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3280 rc = cifs_revalidate_file(file);
3282 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3287 rc = generic_file_mmap(file, vma);
3289 vma->vm_ops = &cifs_file_vm_ops;
3295 cifs_readv_complete(struct work_struct *work)
3297 unsigned int i, got_bytes;
3298 struct cifs_readdata *rdata = container_of(work,
3299 struct cifs_readdata, work);
3301 got_bytes = rdata->got_bytes;
3302 for (i = 0; i < rdata->nr_pages; i++) {
3303 struct page *page = rdata->pages[i];
3305 lru_cache_add_file(page);
3307 if (rdata->result == 0 ||
3308 (rdata->result == -EAGAIN && got_bytes)) {
3309 flush_dcache_page(page);
3310 SetPageUptodate(page);
3315 if (rdata->result == 0 ||
3316 (rdata->result == -EAGAIN && got_bytes))
3317 cifs_readpage_to_fscache(rdata->mapping->host, page);
3319 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3321 page_cache_release(page);
3322 rdata->pages[i] = NULL;
3324 kref_put(&rdata->refcount, cifs_readdata_release);
3328 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3329 struct cifs_readdata *rdata, unsigned int len)
3335 unsigned int nr_pages = rdata->nr_pages;
3338 /* determine the eof that the server (probably) has */
3339 eof = CIFS_I(rdata->mapping->host)->server_eof;
3340 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3341 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3343 rdata->got_bytes = 0;
3344 rdata->tailsz = PAGE_CACHE_SIZE;
3345 for (i = 0; i < nr_pages; i++) {
3346 struct page *page = rdata->pages[i];
3348 if (len >= PAGE_CACHE_SIZE) {
3349 /* enough data to fill the page */
3350 iov.iov_base = kmap(page);
3351 iov.iov_len = PAGE_CACHE_SIZE;
3352 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3353 i, page->index, iov.iov_base, iov.iov_len);
3354 len -= PAGE_CACHE_SIZE;
3355 } else if (len > 0) {
3356 /* enough for partial page, fill and zero the rest */
3357 iov.iov_base = kmap(page);
3359 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3360 i, page->index, iov.iov_base, iov.iov_len);
3361 memset(iov.iov_base + len,
3362 '\0', PAGE_CACHE_SIZE - len);
3363 rdata->tailsz = len;
3365 } else if (page->index > eof_index) {
3367 * The VFS will not try to do readahead past the
3368 * i_size, but it's possible that we have outstanding
3369 * writes with gaps in the middle and the i_size hasn't
3370 * caught up yet. Populate those with zeroed out pages
3371 * to prevent the VFS from repeatedly attempting to
3372 * fill them until the writes are flushed.
3374 zero_user(page, 0, PAGE_CACHE_SIZE);
3375 lru_cache_add_file(page);
3376 flush_dcache_page(page);
3377 SetPageUptodate(page);
3379 page_cache_release(page);
3380 rdata->pages[i] = NULL;
3384 /* no need to hold page hostage */
3385 lru_cache_add_file(page);
3387 page_cache_release(page);
3388 rdata->pages[i] = NULL;
3393 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3398 rdata->got_bytes += result;
3401 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3402 rdata->got_bytes : result;
3406 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3407 unsigned int rsize, struct list_head *tmplist,
3408 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3410 struct page *page, *tpage;
3411 unsigned int expected_index;
3414 INIT_LIST_HEAD(tmplist);
3416 page = list_entry(page_list->prev, struct page, lru);
3419 * Lock the page and put it in the cache. Since no one else
3420 * should have access to this page, we're safe to simply set
3421 * PG_locked without checking it first.
3423 __set_page_locked(page);
3424 rc = add_to_page_cache_locked(page, mapping,
3425 page->index, GFP_KERNEL);
3427 /* give up if we can't stick it in the cache */
3429 __clear_page_locked(page);
3433 /* move first page to the tmplist */
3434 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3435 *bytes = PAGE_CACHE_SIZE;
3437 list_move_tail(&page->lru, tmplist);
3439 /* now try and add more pages onto the request */
3440 expected_index = page->index + 1;
3441 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3442 /* discontinuity ? */
3443 if (page->index != expected_index)
3446 /* would this page push the read over the rsize? */
3447 if (*bytes + PAGE_CACHE_SIZE > rsize)
3450 __set_page_locked(page);
3451 if (add_to_page_cache_locked(page, mapping, page->index,
3453 __clear_page_locked(page);
3456 list_move_tail(&page->lru, tmplist);
3457 (*bytes) += PAGE_CACHE_SIZE;
3464 static int cifs_readpages(struct file *file, struct address_space *mapping,
3465 struct list_head *page_list, unsigned num_pages)
3468 struct list_head tmplist;
3469 struct cifsFileInfo *open_file = file->private_data;
3470 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3471 struct TCP_Server_Info *server;
3475 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3476 * immediately if the cookie is negative
3478 * After this point, every page in the list might have PG_fscache set,
3479 * so we will need to clean that up off of every page we don't use.
3481 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3486 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3487 pid = open_file->pid;
3489 pid = current->tgid;
3492 server = tlink_tcon(open_file->tlink)->ses->server;
3494 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3495 __func__, file, mapping, num_pages);
3498 * Start with the page at end of list and move it to private
3499 * list. Do the same with any following pages until we hit
3500 * the rsize limit, hit an index discontinuity, or run out of
3501 * pages. Issue the async read and then start the loop again
3502 * until the list is empty.
3504 * Note that list order is important. The page_list is in
3505 * the order of declining indexes. When we put the pages in
3506 * the rdata->pages, then we want them in increasing order.
3508 while (!list_empty(page_list)) {
3509 unsigned int i, nr_pages, bytes, rsize;
3511 struct page *page, *tpage;
3512 struct cifs_readdata *rdata;
3515 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3521 * Give up immediately if rsize is too small to read an entire
3522 * page. The VFS will fall back to readpage. We should never
3523 * reach this point however since we set ra_pages to 0 when the
3524 * rsize is smaller than a cache page.
3526 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3527 add_credits_and_wake_if(server, credits, 0);
3531 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3532 &nr_pages, &offset, &bytes);
3534 add_credits_and_wake_if(server, credits, 0);
3538 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3540 /* best to give up if we're out of mem */
3541 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3542 list_del(&page->lru);
3543 lru_cache_add_file(page);
3545 page_cache_release(page);
3548 add_credits_and_wake_if(server, credits, 0);
3552 rdata->cfile = cifsFileInfo_get(open_file);
3553 rdata->mapping = mapping;
3554 rdata->offset = offset;
3555 rdata->bytes = bytes;
3557 rdata->pagesz = PAGE_CACHE_SIZE;
3558 rdata->read_into_pages = cifs_readpages_read_into_pages;
3559 rdata->credits = credits;
3561 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3562 list_del(&page->lru);
3563 rdata->pages[rdata->nr_pages++] = page;
3566 if (!rdata->cfile->invalidHandle ||
3567 !cifs_reopen_file(rdata->cfile, true))
3568 rc = server->ops->async_readv(rdata);
3570 add_credits_and_wake_if(server, rdata->credits, 0);
3571 for (i = 0; i < rdata->nr_pages; i++) {
3572 page = rdata->pages[i];
3573 lru_cache_add_file(page);
3575 page_cache_release(page);
3577 /* Fallback to the readpage in error/reconnect cases */
3578 kref_put(&rdata->refcount, cifs_readdata_release);
3582 kref_put(&rdata->refcount, cifs_readdata_release);
3585 /* Any pages that have been shown to fscache but didn't get added to
3586 * the pagecache must be uncached before they get returned to the
3589 cifs_fscache_readpages_cancel(mapping->host, page_list);
3594 * cifs_readpage_worker must be called with the page pinned
3596 static int cifs_readpage_worker(struct file *file, struct page *page,
3602 /* Is the page cached? */
3603 rc = cifs_readpage_from_fscache(file_inode(file), page);
3607 read_data = kmap(page);
3608 /* for reads over a certain size could initiate async read ahead */
3610 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3615 cifs_dbg(FYI, "Bytes read %d\n", rc);
3617 file_inode(file)->i_atime =
3618 current_fs_time(file_inode(file)->i_sb);
3620 if (PAGE_CACHE_SIZE > rc)
3621 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3623 flush_dcache_page(page);
3624 SetPageUptodate(page);
3626 /* send this page to the cache */
3627 cifs_readpage_to_fscache(file_inode(file), page);
3639 static int cifs_readpage(struct file *file, struct page *page)
3641 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3647 if (file->private_data == NULL) {
3653 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3654 page, (int)offset, (int)offset);
3656 rc = cifs_readpage_worker(file, page, &offset);
3662 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3664 struct cifsFileInfo *open_file;
3666 spin_lock(&cifs_file_list_lock);
3667 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3668 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3669 spin_unlock(&cifs_file_list_lock);
3673 spin_unlock(&cifs_file_list_lock);
3677 /* We do not want to update the file size from server for inodes
3678 open for write - to avoid races with writepage extending
3679 the file - in the future we could consider allowing
3680 refreshing the inode only on increases in the file size
3681 but this is tricky to do without racing with writebehind
3682 page caching in the current Linux kernel design */
3683 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3688 if (is_inode_writable(cifsInode)) {
3689 /* This inode is open for write at least once */
3690 struct cifs_sb_info *cifs_sb;
3692 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3693 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3694 /* since no page cache to corrupt on directio
3695 we can change size safely */
3699 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3707 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3708 loff_t pos, unsigned len, unsigned flags,
3709 struct page **pagep, void **fsdata)
3712 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3713 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3714 loff_t page_start = pos & PAGE_MASK;
3719 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3722 page = grab_cache_page_write_begin(mapping, index, flags);
3728 if (PageUptodate(page))
3732 * If we write a full page it will be up to date, no need to read from
3733 * the server. If the write is short, we'll end up doing a sync write
3736 if (len == PAGE_CACHE_SIZE)
3740 * optimize away the read when we have an oplock, and we're not
3741 * expecting to use any of the data we'd be reading in. That
3742 * is, when the page lies beyond the EOF, or straddles the EOF
3743 * and the write will cover all of the existing data.
3745 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3746 i_size = i_size_read(mapping->host);
3747 if (page_start >= i_size ||
3748 (offset == 0 && (pos + len) >= i_size)) {
3749 zero_user_segments(page, 0, offset,
3753 * PageChecked means that the parts of the page
3754 * to which we're not writing are considered up
3755 * to date. Once the data is copied to the
3756 * page, it can be set uptodate.
3758 SetPageChecked(page);
3763 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3765 * might as well read a page, it is fast enough. If we get
3766 * an error, we don't need to return it. cifs_write_end will
3767 * do a sync write instead since PG_uptodate isn't set.
3769 cifs_readpage_worker(file, page, &page_start);
3770 page_cache_release(page);
3774 /* we could try using another file handle if there is one -
3775 but how would we lock it to prevent close of that handle
3776 racing with this read? In any case
3777 this will be written out by write_end so is fine */
3784 static int cifs_release_page(struct page *page, gfp_t gfp)
3786 if (PagePrivate(page))
3789 return cifs_fscache_release_page(page, gfp);
3792 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3793 unsigned int length)
3795 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3797 if (offset == 0 && length == PAGE_CACHE_SIZE)
3798 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3801 static int cifs_launder_page(struct page *page)
3804 loff_t range_start = page_offset(page);
3805 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3806 struct writeback_control wbc = {
3807 .sync_mode = WB_SYNC_ALL,
3809 .range_start = range_start,
3810 .range_end = range_end,
3813 cifs_dbg(FYI, "Launder page: %p\n", page);
3815 if (clear_page_dirty_for_io(page))
3816 rc = cifs_writepage_locked(page, &wbc);
3818 cifs_fscache_invalidate_page(page, page->mapping->host);
3822 void cifs_oplock_break(struct work_struct *work)
3824 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3826 struct inode *inode = cfile->dentry->d_inode;
3827 struct cifsInodeInfo *cinode = CIFS_I(inode);
3828 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3829 struct TCP_Server_Info *server = tcon->ses->server;
3832 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3833 TASK_UNINTERRUPTIBLE);
3835 server->ops->downgrade_oplock(server, cinode,
3836 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3838 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3839 cifs_has_mand_locks(cinode)) {
3840 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3845 if (inode && S_ISREG(inode->i_mode)) {
3846 if (CIFS_CACHE_READ(cinode))
3847 break_lease(inode, O_RDONLY);
3849 break_lease(inode, O_WRONLY);
3850 rc = filemap_fdatawrite(inode->i_mapping);
3851 if (!CIFS_CACHE_READ(cinode)) {
3852 rc = filemap_fdatawait(inode->i_mapping);
3853 mapping_set_error(inode->i_mapping, rc);
3854 cifs_zap_mapping(inode);
3856 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3859 rc = cifs_push_locks(cfile);
3861 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3864 * releasing stale oplock after recent reconnect of smb session using
3865 * a now incorrect file handle is not a data integrity issue but do
3866 * not bother sending an oplock release if session to server still is
3867 * disconnected since oplock already released by the server
3869 if (!cfile->oplock_break_cancelled) {
3870 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3872 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3874 cifs_done_oplock_break(cinode);
3878 * The presence of cifs_direct_io() in the address space ops vector
3879 * allowes open() O_DIRECT flags which would have failed otherwise.
3881 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3882 * so this method should never be called.
3884 * Direct IO is not yet supported in the cached mode.
3887 cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
3892 * Eventually need to support direct IO for non forcedirectio mounts
3898 const struct address_space_operations cifs_addr_ops = {
3899 .readpage = cifs_readpage,
3900 .readpages = cifs_readpages,
3901 .writepage = cifs_writepage,
3902 .writepages = cifs_writepages,
3903 .write_begin = cifs_write_begin,
3904 .write_end = cifs_write_end,
3905 .set_page_dirty = __set_page_dirty_nobuffers,
3906 .releasepage = cifs_release_page,
3907 .direct_IO = cifs_direct_io,
3908 .invalidatepage = cifs_invalidate_page,
3909 .launder_page = cifs_launder_page,
3913 * cifs_readpages requires the server to support a buffer large enough to
3914 * contain the header plus one complete page of data. Otherwise, we need
3915 * to leave cifs_readpages out of the address space operations.
3917 const struct address_space_operations cifs_addr_ops_smallbuf = {
3918 .readpage = cifs_readpage,
3919 .writepage = cifs_writepage,
3920 .writepages = cifs_writepages,
3921 .write_begin = cifs_write_begin,
3922 .write_end = cifs_write_end,
3923 .set_page_dirty = __set_page_dirty_nobuffers,
3924 .releasepage = cifs_release_page,
3925 .invalidatepage = cifs_invalidate_page,
3926 .launder_page = cifs_launder_page,