f7dceef8bdfdb2f8fe6b7ce04a5b5a628eba8036
[sfrench/cifs-2.6.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "smb2proto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37 #include "cached_dir.h"
38
39 /*
40  * Mark as invalid, all open files on tree connections since they
41  * were closed when session to server was lost.
42  */
43 void
44 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45 {
46         struct cifsFileInfo *open_file = NULL;
47         struct list_head *tmp;
48         struct list_head *tmp1;
49
50         /* only send once per connect */
51         spin_lock(&tcon->tc_lock);
52         if (tcon->status != TID_NEED_RECON) {
53                 spin_unlock(&tcon->tc_lock);
54                 return;
55         }
56         tcon->status = TID_IN_FILES_INVALIDATE;
57         spin_unlock(&tcon->tc_lock);
58
59         /* list all files open on tree connection and mark them invalid */
60         spin_lock(&tcon->open_file_lock);
61         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63                 open_file->invalidHandle = true;
64                 open_file->oplock_break_cancelled = true;
65         }
66         spin_unlock(&tcon->open_file_lock);
67
68         invalidate_all_cached_dirs(tcon);
69         spin_lock(&tcon->tc_lock);
70         if (tcon->status == TID_IN_FILES_INVALIDATE)
71                 tcon->status = TID_NEED_TCON;
72         spin_unlock(&tcon->tc_lock);
73
74         /*
75          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76          * to this tcon.
77          */
78 }
79
80 static inline int cifs_convert_flags(unsigned int flags)
81 {
82         if ((flags & O_ACCMODE) == O_RDONLY)
83                 return GENERIC_READ;
84         else if ((flags & O_ACCMODE) == O_WRONLY)
85                 return GENERIC_WRITE;
86         else if ((flags & O_ACCMODE) == O_RDWR) {
87                 /* GENERIC_ALL is too much permission to request
88                    can cause unnecessary access denied on create */
89                 /* return GENERIC_ALL; */
90                 return (GENERIC_READ | GENERIC_WRITE);
91         }
92
93         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95                 FILE_READ_DATA);
96 }
97
98 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99 static u32 cifs_posix_convert_flags(unsigned int flags)
100 {
101         u32 posix_flags = 0;
102
103         if ((flags & O_ACCMODE) == O_RDONLY)
104                 posix_flags = SMB_O_RDONLY;
105         else if ((flags & O_ACCMODE) == O_WRONLY)
106                 posix_flags = SMB_O_WRONLY;
107         else if ((flags & O_ACCMODE) == O_RDWR)
108                 posix_flags = SMB_O_RDWR;
109
110         if (flags & O_CREAT) {
111                 posix_flags |= SMB_O_CREAT;
112                 if (flags & O_EXCL)
113                         posix_flags |= SMB_O_EXCL;
114         } else if (flags & O_EXCL)
115                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116                          current->comm, current->tgid);
117
118         if (flags & O_TRUNC)
119                 posix_flags |= SMB_O_TRUNC;
120         /* be safe and imply O_SYNC for O_DSYNC */
121         if (flags & O_DSYNC)
122                 posix_flags |= SMB_O_SYNC;
123         if (flags & O_DIRECTORY)
124                 posix_flags |= SMB_O_DIRECTORY;
125         if (flags & O_NOFOLLOW)
126                 posix_flags |= SMB_O_NOFOLLOW;
127         if (flags & O_DIRECT)
128                 posix_flags |= SMB_O_DIRECT;
129
130         return posix_flags;
131 }
132 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134 static inline int cifs_get_disposition(unsigned int flags)
135 {
136         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137                 return FILE_CREATE;
138         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139                 return FILE_OVERWRITE_IF;
140         else if ((flags & O_CREAT) == O_CREAT)
141                 return FILE_OPEN_IF;
142         else if ((flags & O_TRUNC) == O_TRUNC)
143                 return FILE_OVERWRITE;
144         else
145                 return FILE_OPEN;
146 }
147
148 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149 int cifs_posix_open(const char *full_path, struct inode **pinode,
150                         struct super_block *sb, int mode, unsigned int f_flags,
151                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152 {
153         int rc;
154         FILE_UNIX_BASIC_INFO *presp_data;
155         __u32 posix_flags = 0;
156         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157         struct cifs_fattr fattr;
158         struct tcon_link *tlink;
159         struct cifs_tcon *tcon;
160
161         cifs_dbg(FYI, "posix open %s\n", full_path);
162
163         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164         if (presp_data == NULL)
165                 return -ENOMEM;
166
167         tlink = cifs_sb_tlink(cifs_sb);
168         if (IS_ERR(tlink)) {
169                 rc = PTR_ERR(tlink);
170                 goto posix_open_ret;
171         }
172
173         tcon = tlink_tcon(tlink);
174         mode &= ~current_umask();
175
176         posix_flags = cifs_posix_convert_flags(f_flags);
177         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178                              poplock, full_path, cifs_sb->local_nls,
179                              cifs_remap(cifs_sb));
180         cifs_put_tlink(tlink);
181
182         if (rc)
183                 goto posix_open_ret;
184
185         if (presp_data->Type == cpu_to_le32(-1))
186                 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188         if (!pinode)
189                 goto posix_open_ret; /* caller does not need info */
190
191         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193         /* get new inode and set it up */
194         if (*pinode == NULL) {
195                 cifs_fill_uniqueid(sb, &fattr);
196                 *pinode = cifs_iget(sb, &fattr);
197                 if (!*pinode) {
198                         rc = -ENOMEM;
199                         goto posix_open_ret;
200                 }
201         } else {
202                 cifs_revalidate_mapping(*pinode);
203                 rc = cifs_fattr_to_inode(*pinode, &fattr);
204         }
205
206 posix_open_ret:
207         kfree(presp_data);
208         return rc;
209 }
210 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
213                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
214                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
215 {
216         int rc;
217         int desired_access;
218         int disposition;
219         int create_options = CREATE_NOT_DIR;
220         struct TCP_Server_Info *server = tcon->ses->server;
221         struct cifs_open_parms oparms;
222
223         if (!server->ops->open)
224                 return -ENOSYS;
225
226         desired_access = cifs_convert_flags(f_flags);
227
228 /*********************************************************************
229  *  open flag mapping table:
230  *
231  *      POSIX Flag            CIFS Disposition
232  *      ----------            ----------------
233  *      O_CREAT               FILE_OPEN_IF
234  *      O_CREAT | O_EXCL      FILE_CREATE
235  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
236  *      O_TRUNC               FILE_OVERWRITE
237  *      none of the above     FILE_OPEN
238  *
239  *      Note that there is not a direct match between disposition
240  *      FILE_SUPERSEDE (ie create whether or not file exists although
241  *      O_CREAT | O_TRUNC is similar but truncates the existing
242  *      file rather than creating a new file as FILE_SUPERSEDE does
243  *      (which uses the attributes / metadata passed in on open call)
244  *?
245  *?  O_SYNC is a reasonable match to CIFS writethrough flag
246  *?  and the read write flags match reasonably.  O_LARGEFILE
247  *?  is irrelevant because largefile support is always used
248  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
249  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
250  *********************************************************************/
251
252         disposition = cifs_get_disposition(f_flags);
253
254         /* BB pass O_SYNC flag through on file attributes .. BB */
255
256         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
257         if (f_flags & O_SYNC)
258                 create_options |= CREATE_WRITE_THROUGH;
259
260         if (f_flags & O_DIRECT)
261                 create_options |= CREATE_NO_BUFFER;
262
263         oparms = (struct cifs_open_parms) {
264                 .tcon = tcon,
265                 .cifs_sb = cifs_sb,
266                 .desired_access = desired_access,
267                 .create_options = cifs_create_options(cifs_sb, create_options),
268                 .disposition = disposition,
269                 .path = full_path,
270                 .fid = fid,
271         };
272
273         rc = server->ops->open(xid, &oparms, oplock, buf);
274         if (rc)
275                 return rc;
276
277         /* TODO: Add support for calling posix query info but with passing in fid */
278         if (tcon->unix_ext)
279                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
280                                               xid);
281         else
282                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
283                                          xid, fid);
284
285         if (rc) {
286                 server->ops->close(xid, tcon, fid);
287                 if (rc == -ESTALE)
288                         rc = -EOPENSTALE;
289         }
290
291         return rc;
292 }
293
294 static bool
295 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
296 {
297         struct cifs_fid_locks *cur;
298         bool has_locks = false;
299
300         down_read(&cinode->lock_sem);
301         list_for_each_entry(cur, &cinode->llist, llist) {
302                 if (!list_empty(&cur->locks)) {
303                         has_locks = true;
304                         break;
305                 }
306         }
307         up_read(&cinode->lock_sem);
308         return has_locks;
309 }
310
311 void
312 cifs_down_write(struct rw_semaphore *sem)
313 {
314         while (!down_write_trylock(sem))
315                 msleep(10);
316 }
317
318 static void cifsFileInfo_put_work(struct work_struct *work);
319
320 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
321                                        struct tcon_link *tlink, __u32 oplock,
322                                        const char *symlink_target)
323 {
324         struct dentry *dentry = file_dentry(file);
325         struct inode *inode = d_inode(dentry);
326         struct cifsInodeInfo *cinode = CIFS_I(inode);
327         struct cifsFileInfo *cfile;
328         struct cifs_fid_locks *fdlocks;
329         struct cifs_tcon *tcon = tlink_tcon(tlink);
330         struct TCP_Server_Info *server = tcon->ses->server;
331
332         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
333         if (cfile == NULL)
334                 return cfile;
335
336         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
337         if (!fdlocks) {
338                 kfree(cfile);
339                 return NULL;
340         }
341
342         if (symlink_target) {
343                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
344                 if (!cfile->symlink_target) {
345                         kfree(fdlocks);
346                         kfree(cfile);
347                         return NULL;
348                 }
349         }
350
351         INIT_LIST_HEAD(&fdlocks->locks);
352         fdlocks->cfile = cfile;
353         cfile->llist = fdlocks;
354
355         cfile->count = 1;
356         cfile->pid = current->tgid;
357         cfile->uid = current_fsuid();
358         cfile->dentry = dget(dentry);
359         cfile->f_flags = file->f_flags;
360         cfile->invalidHandle = false;
361         cfile->deferred_close_scheduled = false;
362         cfile->tlink = cifs_get_tlink(tlink);
363         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
364         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
365         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
366         mutex_init(&cfile->fh_mutex);
367         spin_lock_init(&cfile->file_info_lock);
368
369         cifs_sb_active(inode->i_sb);
370
371         /*
372          * If the server returned a read oplock and we have mandatory brlocks,
373          * set oplock level to None.
374          */
375         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
376                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
377                 oplock = 0;
378         }
379
380         cifs_down_write(&cinode->lock_sem);
381         list_add(&fdlocks->llist, &cinode->llist);
382         up_write(&cinode->lock_sem);
383
384         spin_lock(&tcon->open_file_lock);
385         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
386                 oplock = fid->pending_open->oplock;
387         list_del(&fid->pending_open->olist);
388
389         fid->purge_cache = false;
390         server->ops->set_fid(cfile, fid, oplock);
391
392         list_add(&cfile->tlist, &tcon->openFileList);
393         atomic_inc(&tcon->num_local_opens);
394
395         /* if readable file instance put first in list*/
396         spin_lock(&cinode->open_file_lock);
397         if (file->f_mode & FMODE_READ)
398                 list_add(&cfile->flist, &cinode->openFileList);
399         else
400                 list_add_tail(&cfile->flist, &cinode->openFileList);
401         spin_unlock(&cinode->open_file_lock);
402         spin_unlock(&tcon->open_file_lock);
403
404         if (fid->purge_cache)
405                 cifs_zap_mapping(inode);
406
407         file->private_data = cfile;
408         return cfile;
409 }
410
411 struct cifsFileInfo *
412 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
413 {
414         spin_lock(&cifs_file->file_info_lock);
415         cifsFileInfo_get_locked(cifs_file);
416         spin_unlock(&cifs_file->file_info_lock);
417         return cifs_file;
418 }
419
420 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
421 {
422         struct inode *inode = d_inode(cifs_file->dentry);
423         struct cifsInodeInfo *cifsi = CIFS_I(inode);
424         struct cifsLockInfo *li, *tmp;
425         struct super_block *sb = inode->i_sb;
426
427         cifs_fscache_release_inode_cookie(inode);
428
429         /*
430          * Delete any outstanding lock records. We'll lose them when the file
431          * is closed anyway.
432          */
433         cifs_down_write(&cifsi->lock_sem);
434         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
435                 list_del(&li->llist);
436                 cifs_del_lock_waiters(li);
437                 kfree(li);
438         }
439         list_del(&cifs_file->llist->llist);
440         kfree(cifs_file->llist);
441         up_write(&cifsi->lock_sem);
442
443         cifs_put_tlink(cifs_file->tlink);
444         dput(cifs_file->dentry);
445         cifs_sb_deactive(sb);
446         kfree(cifs_file->symlink_target);
447         kfree(cifs_file);
448 }
449
450 static void cifsFileInfo_put_work(struct work_struct *work)
451 {
452         struct cifsFileInfo *cifs_file = container_of(work,
453                         struct cifsFileInfo, put);
454
455         cifsFileInfo_put_final(cifs_file);
456 }
457
458 /**
459  * cifsFileInfo_put - release a reference of file priv data
460  *
461  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
462  *
463  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
464  */
465 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
466 {
467         _cifsFileInfo_put(cifs_file, true, true);
468 }
469
470 /**
471  * _cifsFileInfo_put - release a reference of file priv data
472  *
473  * This may involve closing the filehandle @cifs_file out on the
474  * server. Must be called without holding tcon->open_file_lock,
475  * cinode->open_file_lock and cifs_file->file_info_lock.
476  *
477  * If @wait_for_oplock_handler is true and we are releasing the last
478  * reference, wait for any running oplock break handler of the file
479  * and cancel any pending one.
480  *
481  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
482  * @wait_oplock_handler: must be false if called from oplock_break_handler
483  * @offload:    not offloaded on close and oplock breaks
484  *
485  */
486 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
487                        bool wait_oplock_handler, bool offload)
488 {
489         struct inode *inode = d_inode(cifs_file->dentry);
490         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
491         struct TCP_Server_Info *server = tcon->ses->server;
492         struct cifsInodeInfo *cifsi = CIFS_I(inode);
493         struct super_block *sb = inode->i_sb;
494         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
495         struct cifs_fid fid = {};
496         struct cifs_pending_open open;
497         bool oplock_break_cancelled;
498
499         spin_lock(&tcon->open_file_lock);
500         spin_lock(&cifsi->open_file_lock);
501         spin_lock(&cifs_file->file_info_lock);
502         if (--cifs_file->count > 0) {
503                 spin_unlock(&cifs_file->file_info_lock);
504                 spin_unlock(&cifsi->open_file_lock);
505                 spin_unlock(&tcon->open_file_lock);
506                 return;
507         }
508         spin_unlock(&cifs_file->file_info_lock);
509
510         if (server->ops->get_lease_key)
511                 server->ops->get_lease_key(inode, &fid);
512
513         /* store open in pending opens to make sure we don't miss lease break */
514         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
515
516         /* remove it from the lists */
517         list_del(&cifs_file->flist);
518         list_del(&cifs_file->tlist);
519         atomic_dec(&tcon->num_local_opens);
520
521         if (list_empty(&cifsi->openFileList)) {
522                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
523                          d_inode(cifs_file->dentry));
524                 /*
525                  * In strict cache mode we need invalidate mapping on the last
526                  * close  because it may cause a error when we open this file
527                  * again and get at least level II oplock.
528                  */
529                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
530                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
531                 cifs_set_oplock_level(cifsi, 0);
532         }
533
534         spin_unlock(&cifsi->open_file_lock);
535         spin_unlock(&tcon->open_file_lock);
536
537         oplock_break_cancelled = wait_oplock_handler ?
538                 cancel_work_sync(&cifs_file->oplock_break) : false;
539
540         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
541                 struct TCP_Server_Info *server = tcon->ses->server;
542                 unsigned int xid;
543
544                 xid = get_xid();
545                 if (server->ops->close_getattr)
546                         server->ops->close_getattr(xid, tcon, cifs_file);
547                 else if (server->ops->close)
548                         server->ops->close(xid, tcon, &cifs_file->fid);
549                 _free_xid(xid);
550         }
551
552         if (oplock_break_cancelled)
553                 cifs_done_oplock_break(cifsi);
554
555         cifs_del_pending_open(&open);
556
557         if (offload)
558                 queue_work(fileinfo_put_wq, &cifs_file->put);
559         else
560                 cifsFileInfo_put_final(cifs_file);
561 }
562
563 int cifs_open(struct inode *inode, struct file *file)
564
565 {
566         int rc = -EACCES;
567         unsigned int xid;
568         __u32 oplock;
569         struct cifs_sb_info *cifs_sb;
570         struct TCP_Server_Info *server;
571         struct cifs_tcon *tcon;
572         struct tcon_link *tlink;
573         struct cifsFileInfo *cfile = NULL;
574         void *page;
575         const char *full_path;
576         bool posix_open_ok = false;
577         struct cifs_fid fid = {};
578         struct cifs_pending_open open;
579         struct cifs_open_info_data data = {};
580
581         xid = get_xid();
582
583         cifs_sb = CIFS_SB(inode->i_sb);
584         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
585                 free_xid(xid);
586                 return -EIO;
587         }
588
589         tlink = cifs_sb_tlink(cifs_sb);
590         if (IS_ERR(tlink)) {
591                 free_xid(xid);
592                 return PTR_ERR(tlink);
593         }
594         tcon = tlink_tcon(tlink);
595         server = tcon->ses->server;
596
597         page = alloc_dentry_path();
598         full_path = build_path_from_dentry(file_dentry(file), page);
599         if (IS_ERR(full_path)) {
600                 rc = PTR_ERR(full_path);
601                 goto out;
602         }
603
604         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
605                  inode, file->f_flags, full_path);
606
607         if (file->f_flags & O_DIRECT &&
608             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
609                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
610                         file->f_op = &cifs_file_direct_nobrl_ops;
611                 else
612                         file->f_op = &cifs_file_direct_ops;
613         }
614
615         /* Get the cached handle as SMB2 close is deferred */
616         rc = cifs_get_readable_path(tcon, full_path, &cfile);
617         if (rc == 0) {
618                 if (file->f_flags == cfile->f_flags) {
619                         file->private_data = cfile;
620                         spin_lock(&CIFS_I(inode)->deferred_lock);
621                         cifs_del_deferred_close(cfile);
622                         spin_unlock(&CIFS_I(inode)->deferred_lock);
623                         goto out;
624                 } else {
625                         _cifsFileInfo_put(cfile, true, false);
626                 }
627         }
628
629         if (server->oplocks)
630                 oplock = REQ_OPLOCK;
631         else
632                 oplock = 0;
633
634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
635         if (!tcon->broken_posix_open && tcon->unix_ext &&
636             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
637                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
638                 /* can not refresh inode info since size could be stale */
639                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
640                                 cifs_sb->ctx->file_mode /* ignored */,
641                                 file->f_flags, &oplock, &fid.netfid, xid);
642                 if (rc == 0) {
643                         cifs_dbg(FYI, "posix open succeeded\n");
644                         posix_open_ok = true;
645                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
646                         if (tcon->ses->serverNOS)
647                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
648                                          tcon->ses->ip_addr,
649                                          tcon->ses->serverNOS);
650                         tcon->broken_posix_open = true;
651                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
652                          (rc != -EOPNOTSUPP)) /* path not found or net err */
653                         goto out;
654                 /*
655                  * Else fallthrough to retry open the old way on network i/o
656                  * or DFS errors.
657                  */
658         }
659 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
660
661         if (server->ops->get_lease_key)
662                 server->ops->get_lease_key(inode, &fid);
663
664         cifs_add_pending_open(&fid, tlink, &open);
665
666         if (!posix_open_ok) {
667                 if (server->ops->get_lease_key)
668                         server->ops->get_lease_key(inode, &fid);
669
670                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
671                                   xid, &data);
672                 if (rc) {
673                         cifs_del_pending_open(&open);
674                         goto out;
675                 }
676         }
677
678         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
679         if (cfile == NULL) {
680                 if (server->ops->close)
681                         server->ops->close(xid, tcon, &fid);
682                 cifs_del_pending_open(&open);
683                 rc = -ENOMEM;
684                 goto out;
685         }
686
687         cifs_fscache_set_inode_cookie(inode, file);
688
689 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
690         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
691                 /*
692                  * Time to set mode which we can not set earlier due to
693                  * problems creating new read-only files.
694                  */
695                 struct cifs_unix_set_info_args args = {
696                         .mode   = inode->i_mode,
697                         .uid    = INVALID_UID, /* no change */
698                         .gid    = INVALID_GID, /* no change */
699                         .ctime  = NO_CHANGE_64,
700                         .atime  = NO_CHANGE_64,
701                         .mtime  = NO_CHANGE_64,
702                         .device = 0,
703                 };
704                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
705                                        cfile->pid);
706         }
707 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
708
709 out:
710         free_dentry_path(page);
711         free_xid(xid);
712         cifs_put_tlink(tlink);
713         cifs_free_open_info(&data);
714         return rc;
715 }
716
717 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
718 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
719 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
720
721 /*
722  * Try to reacquire byte range locks that were released when session
723  * to server was lost.
724  */
725 static int
726 cifs_relock_file(struct cifsFileInfo *cfile)
727 {
728         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
729         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
730         int rc = 0;
731 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
732         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
733 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
734
735         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
736         if (cinode->can_cache_brlcks) {
737                 /* can cache locks - no need to relock */
738                 up_read(&cinode->lock_sem);
739                 return rc;
740         }
741
742 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
743         if (cap_unix(tcon->ses) &&
744             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
745             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
746                 rc = cifs_push_posix_locks(cfile);
747         else
748 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
749                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
750
751         up_read(&cinode->lock_sem);
752         return rc;
753 }
754
755 static int
756 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
757 {
758         int rc = -EACCES;
759         unsigned int xid;
760         __u32 oplock;
761         struct cifs_sb_info *cifs_sb;
762         struct cifs_tcon *tcon;
763         struct TCP_Server_Info *server;
764         struct cifsInodeInfo *cinode;
765         struct inode *inode;
766         void *page;
767         const char *full_path;
768         int desired_access;
769         int disposition = FILE_OPEN;
770         int create_options = CREATE_NOT_DIR;
771         struct cifs_open_parms oparms;
772
773         xid = get_xid();
774         mutex_lock(&cfile->fh_mutex);
775         if (!cfile->invalidHandle) {
776                 mutex_unlock(&cfile->fh_mutex);
777                 free_xid(xid);
778                 return 0;
779         }
780
781         inode = d_inode(cfile->dentry);
782         cifs_sb = CIFS_SB(inode->i_sb);
783         tcon = tlink_tcon(cfile->tlink);
784         server = tcon->ses->server;
785
786         /*
787          * Can not grab rename sem here because various ops, including those
788          * that already have the rename sem can end up causing writepage to get
789          * called and if the server was down that means we end up here, and we
790          * can never tell if the caller already has the rename_sem.
791          */
792         page = alloc_dentry_path();
793         full_path = build_path_from_dentry(cfile->dentry, page);
794         if (IS_ERR(full_path)) {
795                 mutex_unlock(&cfile->fh_mutex);
796                 free_dentry_path(page);
797                 free_xid(xid);
798                 return PTR_ERR(full_path);
799         }
800
801         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
802                  inode, cfile->f_flags, full_path);
803
804         if (tcon->ses->server->oplocks)
805                 oplock = REQ_OPLOCK;
806         else
807                 oplock = 0;
808
809 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
810         if (tcon->unix_ext && cap_unix(tcon->ses) &&
811             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
812                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
813                 /*
814                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
815                  * original open. Must mask them off for a reopen.
816                  */
817                 unsigned int oflags = cfile->f_flags &
818                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
819
820                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
821                                      cifs_sb->ctx->file_mode /* ignored */,
822                                      oflags, &oplock, &cfile->fid.netfid, xid);
823                 if (rc == 0) {
824                         cifs_dbg(FYI, "posix reopen succeeded\n");
825                         oparms.reconnect = true;
826                         goto reopen_success;
827                 }
828                 /*
829                  * fallthrough to retry open the old way on errors, especially
830                  * in the reconnect path it is important to retry hard
831                  */
832         }
833 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
834
835         desired_access = cifs_convert_flags(cfile->f_flags);
836
837         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
838         if (cfile->f_flags & O_SYNC)
839                 create_options |= CREATE_WRITE_THROUGH;
840
841         if (cfile->f_flags & O_DIRECT)
842                 create_options |= CREATE_NO_BUFFER;
843
844         if (server->ops->get_lease_key)
845                 server->ops->get_lease_key(inode, &cfile->fid);
846
847         oparms = (struct cifs_open_parms) {
848                 .tcon = tcon,
849                 .cifs_sb = cifs_sb,
850                 .desired_access = desired_access,
851                 .create_options = cifs_create_options(cifs_sb, create_options),
852                 .disposition = disposition,
853                 .path = full_path,
854                 .fid = &cfile->fid,
855                 .reconnect = true,
856         };
857
858         /*
859          * Can not refresh inode by passing in file_info buf to be returned by
860          * ops->open and then calling get_inode_info with returned buf since
861          * file might have write behind data that needs to be flushed and server
862          * version of file size can be stale. If we knew for sure that inode was
863          * not dirty locally we could do this.
864          */
865         rc = server->ops->open(xid, &oparms, &oplock, NULL);
866         if (rc == -ENOENT && oparms.reconnect == false) {
867                 /* durable handle timeout is expired - open the file again */
868                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
869                 /* indicate that we need to relock the file */
870                 oparms.reconnect = true;
871         }
872
873         if (rc) {
874                 mutex_unlock(&cfile->fh_mutex);
875                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
876                 cifs_dbg(FYI, "oplock: %d\n", oplock);
877                 goto reopen_error_exit;
878         }
879
880 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
881 reopen_success:
882 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
883         cfile->invalidHandle = false;
884         mutex_unlock(&cfile->fh_mutex);
885         cinode = CIFS_I(inode);
886
887         if (can_flush) {
888                 rc = filemap_write_and_wait(inode->i_mapping);
889                 if (!is_interrupt_error(rc))
890                         mapping_set_error(inode->i_mapping, rc);
891
892                 if (tcon->posix_extensions)
893                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
894                 else if (tcon->unix_ext)
895                         rc = cifs_get_inode_info_unix(&inode, full_path,
896                                                       inode->i_sb, xid);
897                 else
898                         rc = cifs_get_inode_info(&inode, full_path, NULL,
899                                                  inode->i_sb, xid, NULL);
900         }
901         /*
902          * Else we are writing out data to server already and could deadlock if
903          * we tried to flush data, and since we do not know if we have data that
904          * would invalidate the current end of file on the server we can not go
905          * to the server to get the new inode info.
906          */
907
908         /*
909          * If the server returned a read oplock and we have mandatory brlocks,
910          * set oplock level to None.
911          */
912         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
913                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
914                 oplock = 0;
915         }
916
917         server->ops->set_fid(cfile, &cfile->fid, oplock);
918         if (oparms.reconnect)
919                 cifs_relock_file(cfile);
920
921 reopen_error_exit:
922         free_dentry_path(page);
923         free_xid(xid);
924         return rc;
925 }
926
927 void smb2_deferred_work_close(struct work_struct *work)
928 {
929         struct cifsFileInfo *cfile = container_of(work,
930                         struct cifsFileInfo, deferred.work);
931
932         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
933         cifs_del_deferred_close(cfile);
934         cfile->deferred_close_scheduled = false;
935         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
936         _cifsFileInfo_put(cfile, true, false);
937 }
938
939 int cifs_close(struct inode *inode, struct file *file)
940 {
941         struct cifsFileInfo *cfile;
942         struct cifsInodeInfo *cinode = CIFS_I(inode);
943         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
944         struct cifs_deferred_close *dclose;
945
946         if (file->private_data != NULL) {
947                 cfile = file->private_data;
948                 file->private_data = NULL;
949                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
950                 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
951                     && cinode->lease_granted &&
952                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
953                     dclose) {
954                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
955                                 inode->i_ctime = inode->i_mtime = current_time(inode);
956                                 cifs_fscache_update_inode_cookie(inode);
957                         }
958                         spin_lock(&cinode->deferred_lock);
959                         cifs_add_deferred_close(cfile, dclose);
960                         if (cfile->deferred_close_scheduled &&
961                             delayed_work_pending(&cfile->deferred)) {
962                                 /*
963                                  * If there is no pending work, mod_delayed_work queues new work.
964                                  * So, Increase the ref count to avoid use-after-free.
965                                  */
966                                 if (!mod_delayed_work(deferredclose_wq,
967                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
968                                         cifsFileInfo_get(cfile);
969                         } else {
970                                 /* Deferred close for files */
971                                 queue_delayed_work(deferredclose_wq,
972                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
973                                 cfile->deferred_close_scheduled = true;
974                                 spin_unlock(&cinode->deferred_lock);
975                                 return 0;
976                         }
977                         spin_unlock(&cinode->deferred_lock);
978                         _cifsFileInfo_put(cfile, true, false);
979                 } else {
980                         _cifsFileInfo_put(cfile, true, false);
981                         kfree(dclose);
982                 }
983         }
984
985         /* return code from the ->release op is always ignored */
986         return 0;
987 }
988
989 void
990 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
991 {
992         struct cifsFileInfo *open_file, *tmp;
993         struct list_head tmp_list;
994
995         if (!tcon->use_persistent || !tcon->need_reopen_files)
996                 return;
997
998         tcon->need_reopen_files = false;
999
1000         cifs_dbg(FYI, "Reopen persistent handles\n");
1001         INIT_LIST_HEAD(&tmp_list);
1002
1003         /* list all files open on tree connection, reopen resilient handles  */
1004         spin_lock(&tcon->open_file_lock);
1005         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1006                 if (!open_file->invalidHandle)
1007                         continue;
1008                 cifsFileInfo_get(open_file);
1009                 list_add_tail(&open_file->rlist, &tmp_list);
1010         }
1011         spin_unlock(&tcon->open_file_lock);
1012
1013         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1014                 if (cifs_reopen_file(open_file, false /* do not flush */))
1015                         tcon->need_reopen_files = true;
1016                 list_del_init(&open_file->rlist);
1017                 cifsFileInfo_put(open_file);
1018         }
1019 }
1020
1021 int cifs_closedir(struct inode *inode, struct file *file)
1022 {
1023         int rc = 0;
1024         unsigned int xid;
1025         struct cifsFileInfo *cfile = file->private_data;
1026         struct cifs_tcon *tcon;
1027         struct TCP_Server_Info *server;
1028         char *buf;
1029
1030         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1031
1032         if (cfile == NULL)
1033                 return rc;
1034
1035         xid = get_xid();
1036         tcon = tlink_tcon(cfile->tlink);
1037         server = tcon->ses->server;
1038
1039         cifs_dbg(FYI, "Freeing private data in close dir\n");
1040         spin_lock(&cfile->file_info_lock);
1041         if (server->ops->dir_needs_close(cfile)) {
1042                 cfile->invalidHandle = true;
1043                 spin_unlock(&cfile->file_info_lock);
1044                 if (server->ops->close_dir)
1045                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1046                 else
1047                         rc = -ENOSYS;
1048                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1049                 /* not much we can do if it fails anyway, ignore rc */
1050                 rc = 0;
1051         } else
1052                 spin_unlock(&cfile->file_info_lock);
1053
1054         buf = cfile->srch_inf.ntwrk_buf_start;
1055         if (buf) {
1056                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1057                 cfile->srch_inf.ntwrk_buf_start = NULL;
1058                 if (cfile->srch_inf.smallBuf)
1059                         cifs_small_buf_release(buf);
1060                 else
1061                         cifs_buf_release(buf);
1062         }
1063
1064         cifs_put_tlink(cfile->tlink);
1065         kfree(file->private_data);
1066         file->private_data = NULL;
1067         /* BB can we lock the filestruct while this is going on? */
1068         free_xid(xid);
1069         return rc;
1070 }
1071
1072 static struct cifsLockInfo *
1073 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1074 {
1075         struct cifsLockInfo *lock =
1076                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1077         if (!lock)
1078                 return lock;
1079         lock->offset = offset;
1080         lock->length = length;
1081         lock->type = type;
1082         lock->pid = current->tgid;
1083         lock->flags = flags;
1084         INIT_LIST_HEAD(&lock->blist);
1085         init_waitqueue_head(&lock->block_q);
1086         return lock;
1087 }
1088
1089 void
1090 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1091 {
1092         struct cifsLockInfo *li, *tmp;
1093         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1094                 list_del_init(&li->blist);
1095                 wake_up(&li->block_q);
1096         }
1097 }
1098
1099 #define CIFS_LOCK_OP    0
1100 #define CIFS_READ_OP    1
1101 #define CIFS_WRITE_OP   2
1102
1103 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1104 static bool
1105 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1106                             __u64 length, __u8 type, __u16 flags,
1107                             struct cifsFileInfo *cfile,
1108                             struct cifsLockInfo **conf_lock, int rw_check)
1109 {
1110         struct cifsLockInfo *li;
1111         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1112         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1113
1114         list_for_each_entry(li, &fdlocks->locks, llist) {
1115                 if (offset + length <= li->offset ||
1116                     offset >= li->offset + li->length)
1117                         continue;
1118                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1119                     server->ops->compare_fids(cfile, cur_cfile)) {
1120                         /* shared lock prevents write op through the same fid */
1121                         if (!(li->type & server->vals->shared_lock_type) ||
1122                             rw_check != CIFS_WRITE_OP)
1123                                 continue;
1124                 }
1125                 if ((type & server->vals->shared_lock_type) &&
1126                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1127                      current->tgid == li->pid) || type == li->type))
1128                         continue;
1129                 if (rw_check == CIFS_LOCK_OP &&
1130                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1131                     server->ops->compare_fids(cfile, cur_cfile))
1132                         continue;
1133                 if (conf_lock)
1134                         *conf_lock = li;
1135                 return true;
1136         }
1137         return false;
1138 }
1139
1140 bool
1141 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1142                         __u8 type, __u16 flags,
1143                         struct cifsLockInfo **conf_lock, int rw_check)
1144 {
1145         bool rc = false;
1146         struct cifs_fid_locks *cur;
1147         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1148
1149         list_for_each_entry(cur, &cinode->llist, llist) {
1150                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1151                                                  flags, cfile, conf_lock,
1152                                                  rw_check);
1153                 if (rc)
1154                         break;
1155         }
1156
1157         return rc;
1158 }
1159
1160 /*
1161  * Check if there is another lock that prevents us to set the lock (mandatory
1162  * style). If such a lock exists, update the flock structure with its
1163  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1164  * or leave it the same if we can't. Returns 0 if we don't need to request to
1165  * the server or 1 otherwise.
1166  */
1167 static int
1168 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1169                __u8 type, struct file_lock *flock)
1170 {
1171         int rc = 0;
1172         struct cifsLockInfo *conf_lock;
1173         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1174         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1175         bool exist;
1176
1177         down_read(&cinode->lock_sem);
1178
1179         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1180                                         flock->fl_flags, &conf_lock,
1181                                         CIFS_LOCK_OP);
1182         if (exist) {
1183                 flock->fl_start = conf_lock->offset;
1184                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1185                 flock->fl_pid = conf_lock->pid;
1186                 if (conf_lock->type & server->vals->shared_lock_type)
1187                         flock->fl_type = F_RDLCK;
1188                 else
1189                         flock->fl_type = F_WRLCK;
1190         } else if (!cinode->can_cache_brlcks)
1191                 rc = 1;
1192         else
1193                 flock->fl_type = F_UNLCK;
1194
1195         up_read(&cinode->lock_sem);
1196         return rc;
1197 }
1198
1199 static void
1200 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1201 {
1202         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1203         cifs_down_write(&cinode->lock_sem);
1204         list_add_tail(&lock->llist, &cfile->llist->locks);
1205         up_write(&cinode->lock_sem);
1206 }
1207
1208 /*
1209  * Set the byte-range lock (mandatory style). Returns:
1210  * 1) 0, if we set the lock and don't need to request to the server;
1211  * 2) 1, if no locks prevent us but we need to request to the server;
1212  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1213  */
1214 static int
1215 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1216                  bool wait)
1217 {
1218         struct cifsLockInfo *conf_lock;
1219         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1220         bool exist;
1221         int rc = 0;
1222
1223 try_again:
1224         exist = false;
1225         cifs_down_write(&cinode->lock_sem);
1226
1227         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1228                                         lock->type, lock->flags, &conf_lock,
1229                                         CIFS_LOCK_OP);
1230         if (!exist && cinode->can_cache_brlcks) {
1231                 list_add_tail(&lock->llist, &cfile->llist->locks);
1232                 up_write(&cinode->lock_sem);
1233                 return rc;
1234         }
1235
1236         if (!exist)
1237                 rc = 1;
1238         else if (!wait)
1239                 rc = -EACCES;
1240         else {
1241                 list_add_tail(&lock->blist, &conf_lock->blist);
1242                 up_write(&cinode->lock_sem);
1243                 rc = wait_event_interruptible(lock->block_q,
1244                                         (lock->blist.prev == &lock->blist) &&
1245                                         (lock->blist.next == &lock->blist));
1246                 if (!rc)
1247                         goto try_again;
1248                 cifs_down_write(&cinode->lock_sem);
1249                 list_del_init(&lock->blist);
1250         }
1251
1252         up_write(&cinode->lock_sem);
1253         return rc;
1254 }
1255
1256 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1257 /*
1258  * Check if there is another lock that prevents us to set the lock (posix
1259  * style). If such a lock exists, update the flock structure with its
1260  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1261  * or leave it the same if we can't. Returns 0 if we don't need to request to
1262  * the server or 1 otherwise.
1263  */
1264 static int
1265 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1266 {
1267         int rc = 0;
1268         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1269         unsigned char saved_type = flock->fl_type;
1270
1271         if ((flock->fl_flags & FL_POSIX) == 0)
1272                 return 1;
1273
1274         down_read(&cinode->lock_sem);
1275         posix_test_lock(file, flock);
1276
1277         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1278                 flock->fl_type = saved_type;
1279                 rc = 1;
1280         }
1281
1282         up_read(&cinode->lock_sem);
1283         return rc;
1284 }
1285
1286 /*
1287  * Set the byte-range lock (posix style). Returns:
1288  * 1) <0, if the error occurs while setting the lock;
1289  * 2) 0, if we set the lock and don't need to request to the server;
1290  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1291  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1292  */
1293 static int
1294 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1295 {
1296         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1297         int rc = FILE_LOCK_DEFERRED + 1;
1298
1299         if ((flock->fl_flags & FL_POSIX) == 0)
1300                 return rc;
1301
1302         cifs_down_write(&cinode->lock_sem);
1303         if (!cinode->can_cache_brlcks) {
1304                 up_write(&cinode->lock_sem);
1305                 return rc;
1306         }
1307
1308         rc = posix_lock_file(file, flock, NULL);
1309         up_write(&cinode->lock_sem);
1310         return rc;
1311 }
1312
1313 int
1314 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1315 {
1316         unsigned int xid;
1317         int rc = 0, stored_rc;
1318         struct cifsLockInfo *li, *tmp;
1319         struct cifs_tcon *tcon;
1320         unsigned int num, max_num, max_buf;
1321         LOCKING_ANDX_RANGE *buf, *cur;
1322         static const int types[] = {
1323                 LOCKING_ANDX_LARGE_FILES,
1324                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1325         };
1326         int i;
1327
1328         xid = get_xid();
1329         tcon = tlink_tcon(cfile->tlink);
1330
1331         /*
1332          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1333          * and check it before using.
1334          */
1335         max_buf = tcon->ses->server->maxBuf;
1336         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1337                 free_xid(xid);
1338                 return -EINVAL;
1339         }
1340
1341         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1342                      PAGE_SIZE);
1343         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1344                         PAGE_SIZE);
1345         max_num = (max_buf - sizeof(struct smb_hdr)) /
1346                                                 sizeof(LOCKING_ANDX_RANGE);
1347         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1348         if (!buf) {
1349                 free_xid(xid);
1350                 return -ENOMEM;
1351         }
1352
1353         for (i = 0; i < 2; i++) {
1354                 cur = buf;
1355                 num = 0;
1356                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1357                         if (li->type != types[i])
1358                                 continue;
1359                         cur->Pid = cpu_to_le16(li->pid);
1360                         cur->LengthLow = cpu_to_le32((u32)li->length);
1361                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1362                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1363                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1364                         if (++num == max_num) {
1365                                 stored_rc = cifs_lockv(xid, tcon,
1366                                                        cfile->fid.netfid,
1367                                                        (__u8)li->type, 0, num,
1368                                                        buf);
1369                                 if (stored_rc)
1370                                         rc = stored_rc;
1371                                 cur = buf;
1372                                 num = 0;
1373                         } else
1374                                 cur++;
1375                 }
1376
1377                 if (num) {
1378                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1379                                                (__u8)types[i], 0, num, buf);
1380                         if (stored_rc)
1381                                 rc = stored_rc;
1382                 }
1383         }
1384
1385         kfree(buf);
1386         free_xid(xid);
1387         return rc;
1388 }
1389
1390 static __u32
1391 hash_lockowner(fl_owner_t owner)
1392 {
1393         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1394 }
1395 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1396
1397 struct lock_to_push {
1398         struct list_head llist;
1399         __u64 offset;
1400         __u64 length;
1401         __u32 pid;
1402         __u16 netfid;
1403         __u8 type;
1404 };
1405
1406 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1407 static int
1408 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1409 {
1410         struct inode *inode = d_inode(cfile->dentry);
1411         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1412         struct file_lock *flock;
1413         struct file_lock_context *flctx = inode->i_flctx;
1414         unsigned int count = 0, i;
1415         int rc = 0, xid, type;
1416         struct list_head locks_to_send, *el;
1417         struct lock_to_push *lck, *tmp;
1418         __u64 length;
1419
1420         xid = get_xid();
1421
1422         if (!flctx)
1423                 goto out;
1424
1425         spin_lock(&flctx->flc_lock);
1426         list_for_each(el, &flctx->flc_posix) {
1427                 count++;
1428         }
1429         spin_unlock(&flctx->flc_lock);
1430
1431         INIT_LIST_HEAD(&locks_to_send);
1432
1433         /*
1434          * Allocating count locks is enough because no FL_POSIX locks can be
1435          * added to the list while we are holding cinode->lock_sem that
1436          * protects locking operations of this inode.
1437          */
1438         for (i = 0; i < count; i++) {
1439                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1440                 if (!lck) {
1441                         rc = -ENOMEM;
1442                         goto err_out;
1443                 }
1444                 list_add_tail(&lck->llist, &locks_to_send);
1445         }
1446
1447         el = locks_to_send.next;
1448         spin_lock(&flctx->flc_lock);
1449         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1450                 if (el == &locks_to_send) {
1451                         /*
1452                          * The list ended. We don't have enough allocated
1453                          * structures - something is really wrong.
1454                          */
1455                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1456                         break;
1457                 }
1458                 length = cifs_flock_len(flock);
1459                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1460                         type = CIFS_RDLCK;
1461                 else
1462                         type = CIFS_WRLCK;
1463                 lck = list_entry(el, struct lock_to_push, llist);
1464                 lck->pid = hash_lockowner(flock->fl_owner);
1465                 lck->netfid = cfile->fid.netfid;
1466                 lck->length = length;
1467                 lck->type = type;
1468                 lck->offset = flock->fl_start;
1469         }
1470         spin_unlock(&flctx->flc_lock);
1471
1472         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1473                 int stored_rc;
1474
1475                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1476                                              lck->offset, lck->length, NULL,
1477                                              lck->type, 0);
1478                 if (stored_rc)
1479                         rc = stored_rc;
1480                 list_del(&lck->llist);
1481                 kfree(lck);
1482         }
1483
1484 out:
1485         free_xid(xid);
1486         return rc;
1487 err_out:
1488         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1489                 list_del(&lck->llist);
1490                 kfree(lck);
1491         }
1492         goto out;
1493 }
1494 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1495
1496 static int
1497 cifs_push_locks(struct cifsFileInfo *cfile)
1498 {
1499         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1500         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1501         int rc = 0;
1502 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1503         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1504 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1505
1506         /* we are going to update can_cache_brlcks here - need a write access */
1507         cifs_down_write(&cinode->lock_sem);
1508         if (!cinode->can_cache_brlcks) {
1509                 up_write(&cinode->lock_sem);
1510                 return rc;
1511         }
1512
1513 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1514         if (cap_unix(tcon->ses) &&
1515             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1516             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1517                 rc = cifs_push_posix_locks(cfile);
1518         else
1519 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1520                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1521
1522         cinode->can_cache_brlcks = false;
1523         up_write(&cinode->lock_sem);
1524         return rc;
1525 }
1526
1527 static void
1528 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1529                 bool *wait_flag, struct TCP_Server_Info *server)
1530 {
1531         if (flock->fl_flags & FL_POSIX)
1532                 cifs_dbg(FYI, "Posix\n");
1533         if (flock->fl_flags & FL_FLOCK)
1534                 cifs_dbg(FYI, "Flock\n");
1535         if (flock->fl_flags & FL_SLEEP) {
1536                 cifs_dbg(FYI, "Blocking lock\n");
1537                 *wait_flag = true;
1538         }
1539         if (flock->fl_flags & FL_ACCESS)
1540                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1541         if (flock->fl_flags & FL_LEASE)
1542                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1543         if (flock->fl_flags &
1544             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1545                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1546                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1547
1548         *type = server->vals->large_lock_type;
1549         if (flock->fl_type == F_WRLCK) {
1550                 cifs_dbg(FYI, "F_WRLCK\n");
1551                 *type |= server->vals->exclusive_lock_type;
1552                 *lock = 1;
1553         } else if (flock->fl_type == F_UNLCK) {
1554                 cifs_dbg(FYI, "F_UNLCK\n");
1555                 *type |= server->vals->unlock_lock_type;
1556                 *unlock = 1;
1557                 /* Check if unlock includes more than one lock range */
1558         } else if (flock->fl_type == F_RDLCK) {
1559                 cifs_dbg(FYI, "F_RDLCK\n");
1560                 *type |= server->vals->shared_lock_type;
1561                 *lock = 1;
1562         } else if (flock->fl_type == F_EXLCK) {
1563                 cifs_dbg(FYI, "F_EXLCK\n");
1564                 *type |= server->vals->exclusive_lock_type;
1565                 *lock = 1;
1566         } else if (flock->fl_type == F_SHLCK) {
1567                 cifs_dbg(FYI, "F_SHLCK\n");
1568                 *type |= server->vals->shared_lock_type;
1569                 *lock = 1;
1570         } else
1571                 cifs_dbg(FYI, "Unknown type of lock\n");
1572 }
1573
1574 static int
1575 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1576            bool wait_flag, bool posix_lck, unsigned int xid)
1577 {
1578         int rc = 0;
1579         __u64 length = cifs_flock_len(flock);
1580         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1581         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1582         struct TCP_Server_Info *server = tcon->ses->server;
1583 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1584         __u16 netfid = cfile->fid.netfid;
1585
1586         if (posix_lck) {
1587                 int posix_lock_type;
1588
1589                 rc = cifs_posix_lock_test(file, flock);
1590                 if (!rc)
1591                         return rc;
1592
1593                 if (type & server->vals->shared_lock_type)
1594                         posix_lock_type = CIFS_RDLCK;
1595                 else
1596                         posix_lock_type = CIFS_WRLCK;
1597                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1598                                       hash_lockowner(flock->fl_owner),
1599                                       flock->fl_start, length, flock,
1600                                       posix_lock_type, wait_flag);
1601                 return rc;
1602         }
1603 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1604
1605         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1606         if (!rc)
1607                 return rc;
1608
1609         /* BB we could chain these into one lock request BB */
1610         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1611                                     1, 0, false);
1612         if (rc == 0) {
1613                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1614                                             type, 0, 1, false);
1615                 flock->fl_type = F_UNLCK;
1616                 if (rc != 0)
1617                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1618                                  rc);
1619                 return 0;
1620         }
1621
1622         if (type & server->vals->shared_lock_type) {
1623                 flock->fl_type = F_WRLCK;
1624                 return 0;
1625         }
1626
1627         type &= ~server->vals->exclusive_lock_type;
1628
1629         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1630                                     type | server->vals->shared_lock_type,
1631                                     1, 0, false);
1632         if (rc == 0) {
1633                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1634                         type | server->vals->shared_lock_type, 0, 1, false);
1635                 flock->fl_type = F_RDLCK;
1636                 if (rc != 0)
1637                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1638                                  rc);
1639         } else
1640                 flock->fl_type = F_WRLCK;
1641
1642         return 0;
1643 }
1644
1645 void
1646 cifs_move_llist(struct list_head *source, struct list_head *dest)
1647 {
1648         struct list_head *li, *tmp;
1649         list_for_each_safe(li, tmp, source)
1650                 list_move(li, dest);
1651 }
1652
1653 void
1654 cifs_free_llist(struct list_head *llist)
1655 {
1656         struct cifsLockInfo *li, *tmp;
1657         list_for_each_entry_safe(li, tmp, llist, llist) {
1658                 cifs_del_lock_waiters(li);
1659                 list_del(&li->llist);
1660                 kfree(li);
1661         }
1662 }
1663
1664 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1665 int
1666 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1667                   unsigned int xid)
1668 {
1669         int rc = 0, stored_rc;
1670         static const int types[] = {
1671                 LOCKING_ANDX_LARGE_FILES,
1672                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1673         };
1674         unsigned int i;
1675         unsigned int max_num, num, max_buf;
1676         LOCKING_ANDX_RANGE *buf, *cur;
1677         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1678         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1679         struct cifsLockInfo *li, *tmp;
1680         __u64 length = cifs_flock_len(flock);
1681         struct list_head tmp_llist;
1682
1683         INIT_LIST_HEAD(&tmp_llist);
1684
1685         /*
1686          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1687          * and check it before using.
1688          */
1689         max_buf = tcon->ses->server->maxBuf;
1690         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1691                 return -EINVAL;
1692
1693         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1694                      PAGE_SIZE);
1695         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1696                         PAGE_SIZE);
1697         max_num = (max_buf - sizeof(struct smb_hdr)) /
1698                                                 sizeof(LOCKING_ANDX_RANGE);
1699         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1700         if (!buf)
1701                 return -ENOMEM;
1702
1703         cifs_down_write(&cinode->lock_sem);
1704         for (i = 0; i < 2; i++) {
1705                 cur = buf;
1706                 num = 0;
1707                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1708                         if (flock->fl_start > li->offset ||
1709                             (flock->fl_start + length) <
1710                             (li->offset + li->length))
1711                                 continue;
1712                         if (current->tgid != li->pid)
1713                                 continue;
1714                         if (types[i] != li->type)
1715                                 continue;
1716                         if (cinode->can_cache_brlcks) {
1717                                 /*
1718                                  * We can cache brlock requests - simply remove
1719                                  * a lock from the file's list.
1720                                  */
1721                                 list_del(&li->llist);
1722                                 cifs_del_lock_waiters(li);
1723                                 kfree(li);
1724                                 continue;
1725                         }
1726                         cur->Pid = cpu_to_le16(li->pid);
1727                         cur->LengthLow = cpu_to_le32((u32)li->length);
1728                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1729                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1730                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1731                         /*
1732                          * We need to save a lock here to let us add it again to
1733                          * the file's list if the unlock range request fails on
1734                          * the server.
1735                          */
1736                         list_move(&li->llist, &tmp_llist);
1737                         if (++num == max_num) {
1738                                 stored_rc = cifs_lockv(xid, tcon,
1739                                                        cfile->fid.netfid,
1740                                                        li->type, num, 0, buf);
1741                                 if (stored_rc) {
1742                                         /*
1743                                          * We failed on the unlock range
1744                                          * request - add all locks from the tmp
1745                                          * list to the head of the file's list.
1746                                          */
1747                                         cifs_move_llist(&tmp_llist,
1748                                                         &cfile->llist->locks);
1749                                         rc = stored_rc;
1750                                 } else
1751                                         /*
1752                                          * The unlock range request succeed -
1753                                          * free the tmp list.
1754                                          */
1755                                         cifs_free_llist(&tmp_llist);
1756                                 cur = buf;
1757                                 num = 0;
1758                         } else
1759                                 cur++;
1760                 }
1761                 if (num) {
1762                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1763                                                types[i], num, 0, buf);
1764                         if (stored_rc) {
1765                                 cifs_move_llist(&tmp_llist,
1766                                                 &cfile->llist->locks);
1767                                 rc = stored_rc;
1768                         } else
1769                                 cifs_free_llist(&tmp_llist);
1770                 }
1771         }
1772
1773         up_write(&cinode->lock_sem);
1774         kfree(buf);
1775         return rc;
1776 }
1777 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1778
1779 static int
1780 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1781            bool wait_flag, bool posix_lck, int lock, int unlock,
1782            unsigned int xid)
1783 {
1784         int rc = 0;
1785         __u64 length = cifs_flock_len(flock);
1786         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1787         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1788         struct TCP_Server_Info *server = tcon->ses->server;
1789         struct inode *inode = d_inode(cfile->dentry);
1790
1791 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1792         if (posix_lck) {
1793                 int posix_lock_type;
1794
1795                 rc = cifs_posix_lock_set(file, flock);
1796                 if (rc <= FILE_LOCK_DEFERRED)
1797                         return rc;
1798
1799                 if (type & server->vals->shared_lock_type)
1800                         posix_lock_type = CIFS_RDLCK;
1801                 else
1802                         posix_lock_type = CIFS_WRLCK;
1803
1804                 if (unlock == 1)
1805                         posix_lock_type = CIFS_UNLCK;
1806
1807                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1808                                       hash_lockowner(flock->fl_owner),
1809                                       flock->fl_start, length,
1810                                       NULL, posix_lock_type, wait_flag);
1811                 goto out;
1812         }
1813 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1814         if (lock) {
1815                 struct cifsLockInfo *lock;
1816
1817                 lock = cifs_lock_init(flock->fl_start, length, type,
1818                                       flock->fl_flags);
1819                 if (!lock)
1820                         return -ENOMEM;
1821
1822                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1823                 if (rc < 0) {
1824                         kfree(lock);
1825                         return rc;
1826                 }
1827                 if (!rc)
1828                         goto out;
1829
1830                 /*
1831                  * Windows 7 server can delay breaking lease from read to None
1832                  * if we set a byte-range lock on a file - break it explicitly
1833                  * before sending the lock to the server to be sure the next
1834                  * read won't conflict with non-overlapted locks due to
1835                  * pagereading.
1836                  */
1837                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1838                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1839                         cifs_zap_mapping(inode);
1840                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1841                                  inode);
1842                         CIFS_I(inode)->oplock = 0;
1843                 }
1844
1845                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1846                                             type, 1, 0, wait_flag);
1847                 if (rc) {
1848                         kfree(lock);
1849                         return rc;
1850                 }
1851
1852                 cifs_lock_add(cfile, lock);
1853         } else if (unlock)
1854                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1855
1856 out:
1857         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1858                 /*
1859                  * If this is a request to remove all locks because we
1860                  * are closing the file, it doesn't matter if the
1861                  * unlocking failed as both cifs.ko and the SMB server
1862                  * remove the lock on file close
1863                  */
1864                 if (rc) {
1865                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1866                         if (!(flock->fl_flags & FL_CLOSE))
1867                                 return rc;
1868                 }
1869                 rc = locks_lock_file_wait(file, flock);
1870         }
1871         return rc;
1872 }
1873
1874 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1875 {
1876         int rc, xid;
1877         int lock = 0, unlock = 0;
1878         bool wait_flag = false;
1879         bool posix_lck = false;
1880         struct cifs_sb_info *cifs_sb;
1881         struct cifs_tcon *tcon;
1882         struct cifsFileInfo *cfile;
1883         __u32 type;
1884
1885         xid = get_xid();
1886
1887         if (!(fl->fl_flags & FL_FLOCK)) {
1888                 rc = -ENOLCK;
1889                 free_xid(xid);
1890                 return rc;
1891         }
1892
1893         cfile = (struct cifsFileInfo *)file->private_data;
1894         tcon = tlink_tcon(cfile->tlink);
1895
1896         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1897                         tcon->ses->server);
1898         cifs_sb = CIFS_FILE_SB(file);
1899
1900         if (cap_unix(tcon->ses) &&
1901             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1902             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1903                 posix_lck = true;
1904
1905         if (!lock && !unlock) {
1906                 /*
1907                  * if no lock or unlock then nothing to do since we do not
1908                  * know what it is
1909                  */
1910                 rc = -EOPNOTSUPP;
1911                 free_xid(xid);
1912                 return rc;
1913         }
1914
1915         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1916                         xid);
1917         free_xid(xid);
1918         return rc;
1919
1920
1921 }
1922
1923 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1924 {
1925         int rc, xid;
1926         int lock = 0, unlock = 0;
1927         bool wait_flag = false;
1928         bool posix_lck = false;
1929         struct cifs_sb_info *cifs_sb;
1930         struct cifs_tcon *tcon;
1931         struct cifsFileInfo *cfile;
1932         __u32 type;
1933
1934         rc = -EACCES;
1935         xid = get_xid();
1936
1937         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
1938                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
1939                  (long long)flock->fl_end);
1940
1941         cfile = (struct cifsFileInfo *)file->private_data;
1942         tcon = tlink_tcon(cfile->tlink);
1943
1944         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1945                         tcon->ses->server);
1946         cifs_sb = CIFS_FILE_SB(file);
1947         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1948
1949         if (cap_unix(tcon->ses) &&
1950             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1951             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1952                 posix_lck = true;
1953         /*
1954          * BB add code here to normalize offset and length to account for
1955          * negative length which we can not accept over the wire.
1956          */
1957         if (IS_GETLK(cmd)) {
1958                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1959                 free_xid(xid);
1960                 return rc;
1961         }
1962
1963         if (!lock && !unlock) {
1964                 /*
1965                  * if no lock or unlock then nothing to do since we do not
1966                  * know what it is
1967                  */
1968                 free_xid(xid);
1969                 return -EOPNOTSUPP;
1970         }
1971
1972         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1973                         xid);
1974         free_xid(xid);
1975         return rc;
1976 }
1977
1978 /*
1979  * update the file size (if needed) after a write. Should be called with
1980  * the inode->i_lock held
1981  */
1982 void
1983 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1984                       unsigned int bytes_written)
1985 {
1986         loff_t end_of_write = offset + bytes_written;
1987
1988         if (end_of_write > cifsi->server_eof)
1989                 cifsi->server_eof = end_of_write;
1990 }
1991
1992 static ssize_t
1993 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1994            size_t write_size, loff_t *offset)
1995 {
1996         int rc = 0;
1997         unsigned int bytes_written = 0;
1998         unsigned int total_written;
1999         struct cifs_tcon *tcon;
2000         struct TCP_Server_Info *server;
2001         unsigned int xid;
2002         struct dentry *dentry = open_file->dentry;
2003         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2004         struct cifs_io_parms io_parms = {0};
2005
2006         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2007                  write_size, *offset, dentry);
2008
2009         tcon = tlink_tcon(open_file->tlink);
2010         server = tcon->ses->server;
2011
2012         if (!server->ops->sync_write)
2013                 return -ENOSYS;
2014
2015         xid = get_xid();
2016
2017         for (total_written = 0; write_size > total_written;
2018              total_written += bytes_written) {
2019                 rc = -EAGAIN;
2020                 while (rc == -EAGAIN) {
2021                         struct kvec iov[2];
2022                         unsigned int len;
2023
2024                         if (open_file->invalidHandle) {
2025                                 /* we could deadlock if we called
2026                                    filemap_fdatawait from here so tell
2027                                    reopen_file not to flush data to
2028                                    server now */
2029                                 rc = cifs_reopen_file(open_file, false);
2030                                 if (rc != 0)
2031                                         break;
2032                         }
2033
2034                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2035                                   (unsigned int)write_size - total_written);
2036                         /* iov[0] is reserved for smb header */
2037                         iov[1].iov_base = (char *)write_data + total_written;
2038                         iov[1].iov_len = len;
2039                         io_parms.pid = pid;
2040                         io_parms.tcon = tcon;
2041                         io_parms.offset = *offset;
2042                         io_parms.length = len;
2043                         rc = server->ops->sync_write(xid, &open_file->fid,
2044                                         &io_parms, &bytes_written, iov, 1);
2045                 }
2046                 if (rc || (bytes_written == 0)) {
2047                         if (total_written)
2048                                 break;
2049                         else {
2050                                 free_xid(xid);
2051                                 return rc;
2052                         }
2053                 } else {
2054                         spin_lock(&d_inode(dentry)->i_lock);
2055                         cifs_update_eof(cifsi, *offset, bytes_written);
2056                         spin_unlock(&d_inode(dentry)->i_lock);
2057                         *offset += bytes_written;
2058                 }
2059         }
2060
2061         cifs_stats_bytes_written(tcon, total_written);
2062
2063         if (total_written > 0) {
2064                 spin_lock(&d_inode(dentry)->i_lock);
2065                 if (*offset > d_inode(dentry)->i_size) {
2066                         i_size_write(d_inode(dentry), *offset);
2067                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2068                 }
2069                 spin_unlock(&d_inode(dentry)->i_lock);
2070         }
2071         mark_inode_dirty_sync(d_inode(dentry));
2072         free_xid(xid);
2073         return total_written;
2074 }
2075
2076 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2077                                         bool fsuid_only)
2078 {
2079         struct cifsFileInfo *open_file = NULL;
2080         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2081
2082         /* only filter by fsuid on multiuser mounts */
2083         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2084                 fsuid_only = false;
2085
2086         spin_lock(&cifs_inode->open_file_lock);
2087         /* we could simply get the first_list_entry since write-only entries
2088            are always at the end of the list but since the first entry might
2089            have a close pending, we go through the whole list */
2090         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2091                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2092                         continue;
2093                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2094                         if ((!open_file->invalidHandle)) {
2095                                 /* found a good file */
2096                                 /* lock it so it will not be closed on us */
2097                                 cifsFileInfo_get(open_file);
2098                                 spin_unlock(&cifs_inode->open_file_lock);
2099                                 return open_file;
2100                         } /* else might as well continue, and look for
2101                              another, or simply have the caller reopen it
2102                              again rather than trying to fix this handle */
2103                 } else /* write only file */
2104                         break; /* write only files are last so must be done */
2105         }
2106         spin_unlock(&cifs_inode->open_file_lock);
2107         return NULL;
2108 }
2109
2110 /* Return -EBADF if no handle is found and general rc otherwise */
2111 int
2112 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2113                        struct cifsFileInfo **ret_file)
2114 {
2115         struct cifsFileInfo *open_file, *inv_file = NULL;
2116         struct cifs_sb_info *cifs_sb;
2117         bool any_available = false;
2118         int rc = -EBADF;
2119         unsigned int refind = 0;
2120         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2121         bool with_delete = flags & FIND_WR_WITH_DELETE;
2122         *ret_file = NULL;
2123
2124         /*
2125          * Having a null inode here (because mapping->host was set to zero by
2126          * the VFS or MM) should not happen but we had reports of on oops (due
2127          * to it being zero) during stress testcases so we need to check for it
2128          */
2129
2130         if (cifs_inode == NULL) {
2131                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2132                 dump_stack();
2133                 return rc;
2134         }
2135
2136         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2137
2138         /* only filter by fsuid on multiuser mounts */
2139         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2140                 fsuid_only = false;
2141
2142         spin_lock(&cifs_inode->open_file_lock);
2143 refind_writable:
2144         if (refind > MAX_REOPEN_ATT) {
2145                 spin_unlock(&cifs_inode->open_file_lock);
2146                 return rc;
2147         }
2148         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2149                 if (!any_available && open_file->pid != current->tgid)
2150                         continue;
2151                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2152                         continue;
2153                 if (with_delete && !(open_file->fid.access & DELETE))
2154                         continue;
2155                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2156                         if (!open_file->invalidHandle) {
2157                                 /* found a good writable file */
2158                                 cifsFileInfo_get(open_file);
2159                                 spin_unlock(&cifs_inode->open_file_lock);
2160                                 *ret_file = open_file;
2161                                 return 0;
2162                         } else {
2163                                 if (!inv_file)
2164                                         inv_file = open_file;
2165                         }
2166                 }
2167         }
2168         /* couldn't find useable FH with same pid, try any available */
2169         if (!any_available) {
2170                 any_available = true;
2171                 goto refind_writable;
2172         }
2173
2174         if (inv_file) {
2175                 any_available = false;
2176                 cifsFileInfo_get(inv_file);
2177         }
2178
2179         spin_unlock(&cifs_inode->open_file_lock);
2180
2181         if (inv_file) {
2182                 rc = cifs_reopen_file(inv_file, false);
2183                 if (!rc) {
2184                         *ret_file = inv_file;
2185                         return 0;
2186                 }
2187
2188                 spin_lock(&cifs_inode->open_file_lock);
2189                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2190                 spin_unlock(&cifs_inode->open_file_lock);
2191                 cifsFileInfo_put(inv_file);
2192                 ++refind;
2193                 inv_file = NULL;
2194                 spin_lock(&cifs_inode->open_file_lock);
2195                 goto refind_writable;
2196         }
2197
2198         return rc;
2199 }
2200
2201 struct cifsFileInfo *
2202 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2203 {
2204         struct cifsFileInfo *cfile;
2205         int rc;
2206
2207         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2208         if (rc)
2209                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2210
2211         return cfile;
2212 }
2213
2214 int
2215 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2216                        int flags,
2217                        struct cifsFileInfo **ret_file)
2218 {
2219         struct cifsFileInfo *cfile;
2220         void *page = alloc_dentry_path();
2221
2222         *ret_file = NULL;
2223
2224         spin_lock(&tcon->open_file_lock);
2225         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2226                 struct cifsInodeInfo *cinode;
2227                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2228                 if (IS_ERR(full_path)) {
2229                         spin_unlock(&tcon->open_file_lock);
2230                         free_dentry_path(page);
2231                         return PTR_ERR(full_path);
2232                 }
2233                 if (strcmp(full_path, name))
2234                         continue;
2235
2236                 cinode = CIFS_I(d_inode(cfile->dentry));
2237                 spin_unlock(&tcon->open_file_lock);
2238                 free_dentry_path(page);
2239                 return cifs_get_writable_file(cinode, flags, ret_file);
2240         }
2241
2242         spin_unlock(&tcon->open_file_lock);
2243         free_dentry_path(page);
2244         return -ENOENT;
2245 }
2246
2247 int
2248 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2249                        struct cifsFileInfo **ret_file)
2250 {
2251         struct cifsFileInfo *cfile;
2252         void *page = alloc_dentry_path();
2253
2254         *ret_file = NULL;
2255
2256         spin_lock(&tcon->open_file_lock);
2257         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2258                 struct cifsInodeInfo *cinode;
2259                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2260                 if (IS_ERR(full_path)) {
2261                         spin_unlock(&tcon->open_file_lock);
2262                         free_dentry_path(page);
2263                         return PTR_ERR(full_path);
2264                 }
2265                 if (strcmp(full_path, name))
2266                         continue;
2267
2268                 cinode = CIFS_I(d_inode(cfile->dentry));
2269                 spin_unlock(&tcon->open_file_lock);
2270                 free_dentry_path(page);
2271                 *ret_file = find_readable_file(cinode, 0);
2272                 return *ret_file ? 0 : -ENOENT;
2273         }
2274
2275         spin_unlock(&tcon->open_file_lock);
2276         free_dentry_path(page);
2277         return -ENOENT;
2278 }
2279
2280 void
2281 cifs_writedata_release(struct kref *refcount)
2282 {
2283         struct cifs_writedata *wdata = container_of(refcount,
2284                                         struct cifs_writedata, refcount);
2285 #ifdef CONFIG_CIFS_SMB_DIRECT
2286         if (wdata->mr) {
2287                 smbd_deregister_mr(wdata->mr);
2288                 wdata->mr = NULL;
2289         }
2290 #endif
2291
2292         if (wdata->cfile)
2293                 cifsFileInfo_put(wdata->cfile);
2294
2295         kvfree(wdata->pages);
2296         kfree(wdata);
2297 }
2298
2299 /*
2300  * Write failed with a retryable error. Resend the write request. It's also
2301  * possible that the page was redirtied so re-clean the page.
2302  */
2303 static void
2304 cifs_writev_requeue(struct cifs_writedata *wdata)
2305 {
2306         int i, rc = 0;
2307         struct inode *inode = d_inode(wdata->cfile->dentry);
2308         struct TCP_Server_Info *server;
2309         unsigned int rest_len;
2310
2311         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2312         i = 0;
2313         rest_len = wdata->bytes;
2314         do {
2315                 struct cifs_writedata *wdata2;
2316                 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2317
2318                 wsize = server->ops->wp_retry_size(inode);
2319                 if (wsize < rest_len) {
2320                         nr_pages = wsize / PAGE_SIZE;
2321                         if (!nr_pages) {
2322                                 rc = -EOPNOTSUPP;
2323                                 break;
2324                         }
2325                         cur_len = nr_pages * PAGE_SIZE;
2326                         tailsz = PAGE_SIZE;
2327                 } else {
2328                         nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2329                         cur_len = rest_len;
2330                         tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2331                 }
2332
2333                 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2334                 if (!wdata2) {
2335                         rc = -ENOMEM;
2336                         break;
2337                 }
2338
2339                 for (j = 0; j < nr_pages; j++) {
2340                         wdata2->pages[j] = wdata->pages[i + j];
2341                         lock_page(wdata2->pages[j]);
2342                         clear_page_dirty_for_io(wdata2->pages[j]);
2343                 }
2344
2345                 wdata2->sync_mode = wdata->sync_mode;
2346                 wdata2->nr_pages = nr_pages;
2347                 wdata2->offset = page_offset(wdata2->pages[0]);
2348                 wdata2->pagesz = PAGE_SIZE;
2349                 wdata2->tailsz = tailsz;
2350                 wdata2->bytes = cur_len;
2351
2352                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2353                                             &wdata2->cfile);
2354                 if (!wdata2->cfile) {
2355                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2356                                  rc);
2357                         if (!is_retryable_error(rc))
2358                                 rc = -EBADF;
2359                 } else {
2360                         wdata2->pid = wdata2->cfile->pid;
2361                         rc = server->ops->async_writev(wdata2,
2362                                                        cifs_writedata_release);
2363                 }
2364
2365                 for (j = 0; j < nr_pages; j++) {
2366                         unlock_page(wdata2->pages[j]);
2367                         if (rc != 0 && !is_retryable_error(rc)) {
2368                                 SetPageError(wdata2->pages[j]);
2369                                 end_page_writeback(wdata2->pages[j]);
2370                                 put_page(wdata2->pages[j]);
2371                         }
2372                 }
2373
2374                 kref_put(&wdata2->refcount, cifs_writedata_release);
2375                 if (rc) {
2376                         if (is_retryable_error(rc))
2377                                 continue;
2378                         i += nr_pages;
2379                         break;
2380                 }
2381
2382                 rest_len -= cur_len;
2383                 i += nr_pages;
2384         } while (i < wdata->nr_pages);
2385
2386         /* cleanup remaining pages from the original wdata */
2387         for (; i < wdata->nr_pages; i++) {
2388                 SetPageError(wdata->pages[i]);
2389                 end_page_writeback(wdata->pages[i]);
2390                 put_page(wdata->pages[i]);
2391         }
2392
2393         if (rc != 0 && !is_retryable_error(rc))
2394                 mapping_set_error(inode->i_mapping, rc);
2395         kref_put(&wdata->refcount, cifs_writedata_release);
2396 }
2397
2398 void
2399 cifs_writev_complete(struct work_struct *work)
2400 {
2401         struct cifs_writedata *wdata = container_of(work,
2402                                                 struct cifs_writedata, work);
2403         struct inode *inode = d_inode(wdata->cfile->dentry);
2404         int i = 0;
2405
2406         if (wdata->result == 0) {
2407                 spin_lock(&inode->i_lock);
2408                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2409                 spin_unlock(&inode->i_lock);
2410                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2411                                          wdata->bytes);
2412         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2413                 return cifs_writev_requeue(wdata);
2414
2415         for (i = 0; i < wdata->nr_pages; i++) {
2416                 struct page *page = wdata->pages[i];
2417
2418                 if (wdata->result == -EAGAIN)
2419                         __set_page_dirty_nobuffers(page);
2420                 else if (wdata->result < 0)
2421                         SetPageError(page);
2422                 end_page_writeback(page);
2423                 cifs_readpage_to_fscache(inode, page);
2424                 put_page(page);
2425         }
2426         if (wdata->result != -EAGAIN)
2427                 mapping_set_error(inode->i_mapping, wdata->result);
2428         kref_put(&wdata->refcount, cifs_writedata_release);
2429 }
2430
2431 struct cifs_writedata *
2432 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2433 {
2434         struct cifs_writedata *writedata = NULL;
2435         struct page **pages =
2436                 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2437         if (pages) {
2438                 writedata = cifs_writedata_direct_alloc(pages, complete);
2439                 if (!writedata)
2440                         kvfree(pages);
2441         }
2442
2443         return writedata;
2444 }
2445
2446 struct cifs_writedata *
2447 cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2448 {
2449         struct cifs_writedata *wdata;
2450
2451         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2452         if (wdata != NULL) {
2453                 wdata->pages = pages;
2454                 kref_init(&wdata->refcount);
2455                 INIT_LIST_HEAD(&wdata->list);
2456                 init_completion(&wdata->done);
2457                 INIT_WORK(&wdata->work, complete);
2458         }
2459         return wdata;
2460 }
2461
2462
2463 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2464 {
2465         struct address_space *mapping = page->mapping;
2466         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2467         char *write_data;
2468         int rc = -EFAULT;
2469         int bytes_written = 0;
2470         struct inode *inode;
2471         struct cifsFileInfo *open_file;
2472
2473         if (!mapping || !mapping->host)
2474                 return -EFAULT;
2475
2476         inode = page->mapping->host;
2477
2478         offset += (loff_t)from;
2479         write_data = kmap(page);
2480         write_data += from;
2481
2482         if ((to > PAGE_SIZE) || (from > to)) {
2483                 kunmap(page);
2484                 return -EIO;
2485         }
2486
2487         /* racing with truncate? */
2488         if (offset > mapping->host->i_size) {
2489                 kunmap(page);
2490                 return 0; /* don't care */
2491         }
2492
2493         /* check to make sure that we are not extending the file */
2494         if (mapping->host->i_size - offset < (loff_t)to)
2495                 to = (unsigned)(mapping->host->i_size - offset);
2496
2497         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2498                                     &open_file);
2499         if (!rc) {
2500                 bytes_written = cifs_write(open_file, open_file->pid,
2501                                            write_data, to - from, &offset);
2502                 cifsFileInfo_put(open_file);
2503                 /* Does mm or vfs already set times? */
2504                 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
2505                 if ((bytes_written > 0) && (offset))
2506                         rc = 0;
2507                 else if (bytes_written < 0)
2508                         rc = bytes_written;
2509                 else
2510                         rc = -EFAULT;
2511         } else {
2512                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2513                 if (!is_retryable_error(rc))
2514                         rc = -EIO;
2515         }
2516
2517         kunmap(page);
2518         return rc;
2519 }
2520
2521 static struct cifs_writedata *
2522 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2523                           pgoff_t end, pgoff_t *index,
2524                           unsigned int *found_pages)
2525 {
2526         struct cifs_writedata *wdata;
2527
2528         wdata = cifs_writedata_alloc((unsigned int)tofind,
2529                                      cifs_writev_complete);
2530         if (!wdata)
2531                 return NULL;
2532
2533         *found_pages = find_get_pages_range_tag(mapping, index, end,
2534                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2535         return wdata;
2536 }
2537
2538 static unsigned int
2539 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2540                     struct address_space *mapping,
2541                     struct writeback_control *wbc,
2542                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2543 {
2544         unsigned int nr_pages = 0, i;
2545         struct page *page;
2546
2547         for (i = 0; i < found_pages; i++) {
2548                 page = wdata->pages[i];
2549                 /*
2550                  * At this point we hold neither the i_pages lock nor the
2551                  * page lock: the page may be truncated or invalidated
2552                  * (changing page->mapping to NULL), or even swizzled
2553                  * back from swapper_space to tmpfs file mapping
2554                  */
2555
2556                 if (nr_pages == 0)
2557                         lock_page(page);
2558                 else if (!trylock_page(page))
2559                         break;
2560
2561                 if (unlikely(page->mapping != mapping)) {
2562                         unlock_page(page);
2563                         break;
2564                 }
2565
2566                 if (!wbc->range_cyclic && page->index > end) {
2567                         *done = true;
2568                         unlock_page(page);
2569                         break;
2570                 }
2571
2572                 if (*next && (page->index != *next)) {
2573                         /* Not next consecutive page */
2574                         unlock_page(page);
2575                         break;
2576                 }
2577
2578                 if (wbc->sync_mode != WB_SYNC_NONE)
2579                         wait_on_page_writeback(page);
2580
2581                 if (PageWriteback(page) ||
2582                                 !clear_page_dirty_for_io(page)) {
2583                         unlock_page(page);
2584                         break;
2585                 }
2586
2587                 /*
2588                  * This actually clears the dirty bit in the radix tree.
2589                  * See cifs_writepage() for more commentary.
2590                  */
2591                 set_page_writeback(page);
2592                 if (page_offset(page) >= i_size_read(mapping->host)) {
2593                         *done = true;
2594                         unlock_page(page);
2595                         end_page_writeback(page);
2596                         break;
2597                 }
2598
2599                 wdata->pages[i] = page;
2600                 *next = page->index + 1;
2601                 ++nr_pages;
2602         }
2603
2604         /* reset index to refind any pages skipped */
2605         if (nr_pages == 0)
2606                 *index = wdata->pages[0]->index + 1;
2607
2608         /* put any pages we aren't going to use */
2609         for (i = nr_pages; i < found_pages; i++) {
2610                 put_page(wdata->pages[i]);
2611                 wdata->pages[i] = NULL;
2612         }
2613
2614         return nr_pages;
2615 }
2616
2617 static int
2618 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2619                  struct address_space *mapping, struct writeback_control *wbc)
2620 {
2621         int rc;
2622
2623         wdata->sync_mode = wbc->sync_mode;
2624         wdata->nr_pages = nr_pages;
2625         wdata->offset = page_offset(wdata->pages[0]);
2626         wdata->pagesz = PAGE_SIZE;
2627         wdata->tailsz = min(i_size_read(mapping->host) -
2628                         page_offset(wdata->pages[nr_pages - 1]),
2629                         (loff_t)PAGE_SIZE);
2630         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2631         wdata->pid = wdata->cfile->pid;
2632
2633         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2634         if (rc)
2635                 return rc;
2636
2637         if (wdata->cfile->invalidHandle)
2638                 rc = -EAGAIN;
2639         else
2640                 rc = wdata->server->ops->async_writev(wdata,
2641                                                       cifs_writedata_release);
2642
2643         return rc;
2644 }
2645
2646 static int cifs_writepages(struct address_space *mapping,
2647                            struct writeback_control *wbc)
2648 {
2649         struct inode *inode = mapping->host;
2650         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2651         struct TCP_Server_Info *server;
2652         bool done = false, scanned = false, range_whole = false;
2653         pgoff_t end, index;
2654         struct cifs_writedata *wdata;
2655         struct cifsFileInfo *cfile = NULL;
2656         int rc = 0;
2657         int saved_rc = 0;
2658         unsigned int xid;
2659
2660         /*
2661          * If wsize is smaller than the page cache size, default to writing
2662          * one page at a time via cifs_writepage
2663          */
2664         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2665                 return generic_writepages(mapping, wbc);
2666
2667         xid = get_xid();
2668         if (wbc->range_cyclic) {
2669                 index = mapping->writeback_index; /* Start from prev offset */
2670                 end = -1;
2671         } else {
2672                 index = wbc->range_start >> PAGE_SHIFT;
2673                 end = wbc->range_end >> PAGE_SHIFT;
2674                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2675                         range_whole = true;
2676                 scanned = true;
2677         }
2678         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2679
2680 retry:
2681         while (!done && index <= end) {
2682                 unsigned int i, nr_pages, found_pages, wsize;
2683                 pgoff_t next = 0, tofind, saved_index = index;
2684                 struct cifs_credits credits_on_stack;
2685                 struct cifs_credits *credits = &credits_on_stack;
2686                 int get_file_rc = 0;
2687
2688                 if (cfile)
2689                         cifsFileInfo_put(cfile);
2690
2691                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2692
2693                 /* in case of an error store it to return later */
2694                 if (rc)
2695                         get_file_rc = rc;
2696
2697                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2698                                                    &wsize, credits);
2699                 if (rc != 0) {
2700                         done = true;
2701                         break;
2702                 }
2703
2704                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2705
2706                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2707                                                   &found_pages);
2708                 if (!wdata) {
2709                         rc = -ENOMEM;
2710                         done = true;
2711                         add_credits_and_wake_if(server, credits, 0);
2712                         break;
2713                 }
2714
2715                 if (found_pages == 0) {
2716                         kref_put(&wdata->refcount, cifs_writedata_release);
2717                         add_credits_and_wake_if(server, credits, 0);
2718                         break;
2719                 }
2720
2721                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2722                                                end, &index, &next, &done);
2723
2724                 /* nothing to write? */
2725                 if (nr_pages == 0) {
2726                         kref_put(&wdata->refcount, cifs_writedata_release);
2727                         add_credits_and_wake_if(server, credits, 0);
2728                         continue;
2729                 }
2730
2731                 wdata->credits = credits_on_stack;
2732                 wdata->cfile = cfile;
2733                 wdata->server = server;
2734                 cfile = NULL;
2735
2736                 if (!wdata->cfile) {
2737                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2738                                  get_file_rc);
2739                         if (is_retryable_error(get_file_rc))
2740                                 rc = get_file_rc;
2741                         else
2742                                 rc = -EBADF;
2743                 } else
2744                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2745
2746                 for (i = 0; i < nr_pages; ++i)
2747                         unlock_page(wdata->pages[i]);
2748
2749                 /* send failure -- clean up the mess */
2750                 if (rc != 0) {
2751                         add_credits_and_wake_if(server, &wdata->credits, 0);
2752                         for (i = 0; i < nr_pages; ++i) {
2753                                 if (is_retryable_error(rc))
2754                                         redirty_page_for_writepage(wbc,
2755                                                            wdata->pages[i]);
2756                                 else
2757                                         SetPageError(wdata->pages[i]);
2758                                 end_page_writeback(wdata->pages[i]);
2759                                 put_page(wdata->pages[i]);
2760                         }
2761                         if (!is_retryable_error(rc))
2762                                 mapping_set_error(mapping, rc);
2763                 }
2764                 kref_put(&wdata->refcount, cifs_writedata_release);
2765
2766                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2767                         index = saved_index;
2768                         continue;
2769                 }
2770
2771                 /* Return immediately if we received a signal during writing */
2772                 if (is_interrupt_error(rc)) {
2773                         done = true;
2774                         break;
2775                 }
2776
2777                 if (rc != 0 && saved_rc == 0)
2778                         saved_rc = rc;
2779
2780                 wbc->nr_to_write -= nr_pages;
2781                 if (wbc->nr_to_write <= 0)
2782                         done = true;
2783
2784                 index = next;
2785         }
2786
2787         if (!scanned && !done) {
2788                 /*
2789                  * We hit the last page and there is more work to be done: wrap
2790                  * back to the start of the file
2791                  */
2792                 scanned = true;
2793                 index = 0;
2794                 goto retry;
2795         }
2796
2797         if (saved_rc != 0)
2798                 rc = saved_rc;
2799
2800         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2801                 mapping->writeback_index = index;
2802
2803         if (cfile)
2804                 cifsFileInfo_put(cfile);
2805         free_xid(xid);
2806         /* Indication to update ctime and mtime as close is deferred */
2807         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2808         return rc;
2809 }
2810
2811 static int
2812 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2813 {
2814         int rc;
2815         unsigned int xid;
2816
2817         xid = get_xid();
2818 /* BB add check for wbc flags */
2819         get_page(page);
2820         if (!PageUptodate(page))
2821                 cifs_dbg(FYI, "ppw - page not up to date\n");
2822
2823         /*
2824          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2825          *
2826          * A writepage() implementation always needs to do either this,
2827          * or re-dirty the page with "redirty_page_for_writepage()" in
2828          * the case of a failure.
2829          *
2830          * Just unlocking the page will cause the radix tree tag-bits
2831          * to fail to update with the state of the page correctly.
2832          */
2833         set_page_writeback(page);
2834 retry_write:
2835         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2836         if (is_retryable_error(rc)) {
2837                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2838                         goto retry_write;
2839                 redirty_page_for_writepage(wbc, page);
2840         } else if (rc != 0) {
2841                 SetPageError(page);
2842                 mapping_set_error(page->mapping, rc);
2843         } else {
2844                 SetPageUptodate(page);
2845         }
2846         end_page_writeback(page);
2847         put_page(page);
2848         free_xid(xid);
2849         return rc;
2850 }
2851
2852 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2853 {
2854         int rc = cifs_writepage_locked(page, wbc);
2855         unlock_page(page);
2856         return rc;
2857 }
2858
2859 static int cifs_write_end(struct file *file, struct address_space *mapping,
2860                         loff_t pos, unsigned len, unsigned copied,
2861                         struct page *page, void *fsdata)
2862 {
2863         int rc;
2864         struct inode *inode = mapping->host;
2865         struct cifsFileInfo *cfile = file->private_data;
2866         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2867         __u32 pid;
2868
2869         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2870                 pid = cfile->pid;
2871         else
2872                 pid = current->tgid;
2873
2874         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2875                  page, pos, copied);
2876
2877         if (PageChecked(page)) {
2878                 if (copied == len)
2879                         SetPageUptodate(page);
2880                 ClearPageChecked(page);
2881         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2882                 SetPageUptodate(page);
2883
2884         if (!PageUptodate(page)) {
2885                 char *page_data;
2886                 unsigned offset = pos & (PAGE_SIZE - 1);
2887                 unsigned int xid;
2888
2889                 xid = get_xid();
2890                 /* this is probably better than directly calling
2891                    partialpage_write since in this function the file handle is
2892                    known which we might as well leverage */
2893                 /* BB check if anything else missing out of ppw
2894                    such as updating last write time */
2895                 page_data = kmap(page);
2896                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2897                 /* if (rc < 0) should we set writebehind rc? */
2898                 kunmap(page);
2899
2900                 free_xid(xid);
2901         } else {
2902                 rc = copied;
2903                 pos += copied;
2904                 set_page_dirty(page);
2905         }
2906
2907         if (rc > 0) {
2908                 spin_lock(&inode->i_lock);
2909                 if (pos > inode->i_size) {
2910                         i_size_write(inode, pos);
2911                         inode->i_blocks = (512 - 1 + pos) >> 9;
2912                 }
2913                 spin_unlock(&inode->i_lock);
2914         }
2915
2916         unlock_page(page);
2917         put_page(page);
2918         /* Indication to update ctime and mtime as close is deferred */
2919         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2920
2921         return rc;
2922 }
2923
2924 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2925                       int datasync)
2926 {
2927         unsigned int xid;
2928         int rc = 0;
2929         struct cifs_tcon *tcon;
2930         struct TCP_Server_Info *server;
2931         struct cifsFileInfo *smbfile = file->private_data;
2932         struct inode *inode = file_inode(file);
2933         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2934
2935         rc = file_write_and_wait_range(file, start, end);
2936         if (rc) {
2937                 trace_cifs_fsync_err(inode->i_ino, rc);
2938                 return rc;
2939         }
2940
2941         xid = get_xid();
2942
2943         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2944                  file, datasync);
2945
2946         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2947                 rc = cifs_zap_mapping(inode);
2948                 if (rc) {
2949                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2950                         rc = 0; /* don't care about it in fsync */
2951                 }
2952         }
2953
2954         tcon = tlink_tcon(smbfile->tlink);
2955         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2956                 server = tcon->ses->server;
2957                 if (server->ops->flush == NULL) {
2958                         rc = -ENOSYS;
2959                         goto strict_fsync_exit;
2960                 }
2961
2962                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2963                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2964                         if (smbfile) {
2965                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2966                                 cifsFileInfo_put(smbfile);
2967                         } else
2968                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2969                 } else
2970                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2971         }
2972
2973 strict_fsync_exit:
2974         free_xid(xid);
2975         return rc;
2976 }
2977
2978 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2979 {
2980         unsigned int xid;
2981         int rc = 0;
2982         struct cifs_tcon *tcon;
2983         struct TCP_Server_Info *server;
2984         struct cifsFileInfo *smbfile = file->private_data;
2985         struct inode *inode = file_inode(file);
2986         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2987
2988         rc = file_write_and_wait_range(file, start, end);
2989         if (rc) {
2990                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2991                 return rc;
2992         }
2993
2994         xid = get_xid();
2995
2996         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2997                  file, datasync);
2998
2999         tcon = tlink_tcon(smbfile->tlink);
3000         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3001                 server = tcon->ses->server;
3002                 if (server->ops->flush == NULL) {
3003                         rc = -ENOSYS;
3004                         goto fsync_exit;
3005                 }
3006
3007                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3008                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3009                         if (smbfile) {
3010                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3011                                 cifsFileInfo_put(smbfile);
3012                         } else
3013                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3014                 } else
3015                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3016         }
3017
3018 fsync_exit:
3019         free_xid(xid);
3020         return rc;
3021 }
3022
3023 /*
3024  * As file closes, flush all cached write data for this inode checking
3025  * for write behind errors.
3026  */
3027 int cifs_flush(struct file *file, fl_owner_t id)
3028 {
3029         struct inode *inode = file_inode(file);
3030         int rc = 0;
3031
3032         if (file->f_mode & FMODE_WRITE)
3033                 rc = filemap_write_and_wait(inode->i_mapping);
3034
3035         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3036         if (rc) {
3037                 /* get more nuanced writeback errors */
3038                 rc = filemap_check_wb_err(file->f_mapping, 0);
3039                 trace_cifs_flush_err(inode->i_ino, rc);
3040         }
3041         return rc;
3042 }
3043
3044 static int
3045 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3046 {
3047         int rc = 0;
3048         unsigned long i;
3049
3050         for (i = 0; i < num_pages; i++) {
3051                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3052                 if (!pages[i]) {
3053                         /*
3054                          * save number of pages we have already allocated and
3055                          * return with ENOMEM error
3056                          */
3057                         num_pages = i;
3058                         rc = -ENOMEM;
3059                         break;
3060                 }
3061         }
3062
3063         if (rc) {
3064                 for (i = 0; i < num_pages; i++)
3065                         put_page(pages[i]);
3066         }
3067         return rc;
3068 }
3069
3070 static inline
3071 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3072 {
3073         size_t num_pages;
3074         size_t clen;
3075
3076         clen = min_t(const size_t, len, wsize);
3077         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3078
3079         if (cur_len)
3080                 *cur_len = clen;
3081
3082         return num_pages;
3083 }
3084
3085 static void
3086 cifs_uncached_writedata_release(struct kref *refcount)
3087 {
3088         int i;
3089         struct cifs_writedata *wdata = container_of(refcount,
3090                                         struct cifs_writedata, refcount);
3091
3092         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3093         for (i = 0; i < wdata->nr_pages; i++)
3094                 put_page(wdata->pages[i]);
3095         cifs_writedata_release(refcount);
3096 }
3097
3098 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3099
3100 static void
3101 cifs_uncached_writev_complete(struct work_struct *work)
3102 {
3103         struct cifs_writedata *wdata = container_of(work,
3104                                         struct cifs_writedata, work);
3105         struct inode *inode = d_inode(wdata->cfile->dentry);
3106         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3107
3108         spin_lock(&inode->i_lock);
3109         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3110         if (cifsi->server_eof > inode->i_size)
3111                 i_size_write(inode, cifsi->server_eof);
3112         spin_unlock(&inode->i_lock);
3113
3114         complete(&wdata->done);
3115         collect_uncached_write_data(wdata->ctx);
3116         /* the below call can possibly free the last ref to aio ctx */
3117         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3118 }
3119
3120 static int
3121 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3122                       size_t *len, unsigned long *num_pages)
3123 {
3124         size_t save_len, copied, bytes, cur_len = *len;
3125         unsigned long i, nr_pages = *num_pages;
3126
3127         save_len = cur_len;
3128         for (i = 0; i < nr_pages; i++) {
3129                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3130                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3131                 cur_len -= copied;
3132                 /*
3133                  * If we didn't copy as much as we expected, then that
3134                  * may mean we trod into an unmapped area. Stop copying
3135                  * at that point. On the next pass through the big
3136                  * loop, we'll likely end up getting a zero-length
3137                  * write and bailing out of it.
3138                  */
3139                 if (copied < bytes)
3140                         break;
3141         }
3142         cur_len = save_len - cur_len;
3143         *len = cur_len;
3144
3145         /*
3146          * If we have no data to send, then that probably means that
3147          * the copy above failed altogether. That's most likely because
3148          * the address in the iovec was bogus. Return -EFAULT and let
3149          * the caller free anything we allocated and bail out.
3150          */
3151         if (!cur_len)
3152                 return -EFAULT;
3153
3154         /*
3155          * i + 1 now represents the number of pages we actually used in
3156          * the copy phase above.
3157          */
3158         *num_pages = i + 1;
3159         return 0;
3160 }
3161
3162 static int
3163 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3164         struct cifs_aio_ctx *ctx)
3165 {
3166         unsigned int wsize;
3167         struct cifs_credits credits;
3168         int rc;
3169         struct TCP_Server_Info *server = wdata->server;
3170
3171         do {
3172                 if (wdata->cfile->invalidHandle) {
3173                         rc = cifs_reopen_file(wdata->cfile, false);
3174                         if (rc == -EAGAIN)
3175                                 continue;
3176                         else if (rc)
3177                                 break;
3178                 }
3179
3180
3181                 /*
3182                  * Wait for credits to resend this wdata.
3183                  * Note: we are attempting to resend the whole wdata not in
3184                  * segments
3185                  */
3186                 do {
3187                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3188                                                 &wsize, &credits);
3189                         if (rc)
3190                                 goto fail;
3191
3192                         if (wsize < wdata->bytes) {
3193                                 add_credits_and_wake_if(server, &credits, 0);
3194                                 msleep(1000);
3195                         }
3196                 } while (wsize < wdata->bytes);
3197                 wdata->credits = credits;
3198
3199                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3200
3201                 if (!rc) {
3202                         if (wdata->cfile->invalidHandle)
3203                                 rc = -EAGAIN;
3204                         else {
3205 #ifdef CONFIG_CIFS_SMB_DIRECT
3206                                 if (wdata->mr) {
3207                                         wdata->mr->need_invalidate = true;
3208                                         smbd_deregister_mr(wdata->mr);
3209                                         wdata->mr = NULL;
3210                                 }
3211 #endif
3212                                 rc = server->ops->async_writev(wdata,
3213                                         cifs_uncached_writedata_release);
3214                         }
3215                 }
3216
3217                 /* If the write was successfully sent, we are done */
3218                 if (!rc) {
3219                         list_add_tail(&wdata->list, wdata_list);
3220                         return 0;
3221                 }
3222
3223                 /* Roll back credits and retry if needed */
3224                 add_credits_and_wake_if(server, &wdata->credits, 0);
3225         } while (rc == -EAGAIN);
3226
3227 fail:
3228         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3229         return rc;
3230 }
3231
3232 static int
3233 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3234                      struct cifsFileInfo *open_file,
3235                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3236                      struct cifs_aio_ctx *ctx)
3237 {
3238         int rc = 0;
3239         size_t cur_len;
3240         unsigned long nr_pages, num_pages, i;
3241         struct cifs_writedata *wdata;
3242         struct iov_iter saved_from = *from;
3243         loff_t saved_offset = offset;
3244         pid_t pid;
3245         struct TCP_Server_Info *server;
3246         struct page **pagevec;
3247         size_t start;
3248         unsigned int xid;
3249
3250         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3251                 pid = open_file->pid;
3252         else
3253                 pid = current->tgid;
3254
3255         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3256         xid = get_xid();
3257
3258         do {
3259                 unsigned int wsize;
3260                 struct cifs_credits credits_on_stack;
3261                 struct cifs_credits *credits = &credits_on_stack;
3262
3263                 if (open_file->invalidHandle) {
3264                         rc = cifs_reopen_file(open_file, false);
3265                         if (rc == -EAGAIN)
3266                                 continue;
3267                         else if (rc)
3268                                 break;
3269                 }
3270
3271                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3272                                                    &wsize, credits);
3273                 if (rc)
3274                         break;
3275
3276                 cur_len = min_t(const size_t, len, wsize);
3277
3278                 if (ctx->direct_io) {
3279                         ssize_t result;
3280
3281                         result = iov_iter_get_pages_alloc(
3282                                 from, &pagevec, cur_len, &start);
3283                         if (result < 0) {
3284                                 cifs_dbg(VFS,
3285                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3286                                          result, iov_iter_type(from),
3287                                          from->iov_offset, from->count);
3288                                 dump_stack();
3289
3290                                 rc = result;
3291                                 add_credits_and_wake_if(server, credits, 0);
3292                                 break;
3293                         }
3294                         cur_len = (size_t)result;
3295                         iov_iter_advance(from, cur_len);
3296
3297                         nr_pages =
3298                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3299
3300                         wdata = cifs_writedata_direct_alloc(pagevec,
3301                                              cifs_uncached_writev_complete);
3302                         if (!wdata) {
3303                                 rc = -ENOMEM;
3304                                 for (i = 0; i < nr_pages; i++)
3305                                         put_page(pagevec[i]);
3306                                 kvfree(pagevec);
3307                                 add_credits_and_wake_if(server, credits, 0);
3308                                 break;
3309                         }
3310
3311
3312                         wdata->page_offset = start;
3313                         wdata->tailsz =
3314                                 nr_pages > 1 ?
3315                                         cur_len - (PAGE_SIZE - start) -
3316                                         (nr_pages - 2) * PAGE_SIZE :
3317                                         cur_len;
3318                 } else {
3319                         nr_pages = get_numpages(wsize, len, &cur_len);
3320                         wdata = cifs_writedata_alloc(nr_pages,
3321                                              cifs_uncached_writev_complete);
3322                         if (!wdata) {
3323                                 rc = -ENOMEM;
3324                                 add_credits_and_wake_if(server, credits, 0);
3325                                 break;
3326                         }
3327
3328                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3329                         if (rc) {
3330                                 kvfree(wdata->pages);
3331                                 kfree(wdata);
3332                                 add_credits_and_wake_if(server, credits, 0);
3333                                 break;
3334                         }
3335
3336                         num_pages = nr_pages;
3337                         rc = wdata_fill_from_iovec(
3338                                 wdata, from, &cur_len, &num_pages);
3339                         if (rc) {
3340                                 for (i = 0; i < nr_pages; i++)
3341                                         put_page(wdata->pages[i]);
3342                                 kvfree(wdata->pages);
3343                                 kfree(wdata);
3344                                 add_credits_and_wake_if(server, credits, 0);
3345                                 break;
3346                         }
3347
3348                         /*
3349                          * Bring nr_pages down to the number of pages we
3350                          * actually used, and free any pages that we didn't use.
3351                          */
3352                         for ( ; nr_pages > num_pages; nr_pages--)
3353                                 put_page(wdata->pages[nr_pages - 1]);
3354
3355                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3356                 }
3357
3358                 wdata->sync_mode = WB_SYNC_ALL;
3359                 wdata->nr_pages = nr_pages;
3360                 wdata->offset = (__u64)offset;
3361                 wdata->cfile = cifsFileInfo_get(open_file);
3362                 wdata->server = server;
3363                 wdata->pid = pid;
3364                 wdata->bytes = cur_len;
3365                 wdata->pagesz = PAGE_SIZE;
3366                 wdata->credits = credits_on_stack;
3367                 wdata->ctx = ctx;
3368                 kref_get(&ctx->refcount);
3369
3370                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3371
3372                 if (!rc) {
3373                         if (wdata->cfile->invalidHandle)
3374                                 rc = -EAGAIN;
3375                         else
3376                                 rc = server->ops->async_writev(wdata,
3377                                         cifs_uncached_writedata_release);
3378                 }
3379
3380                 if (rc) {
3381                         add_credits_and_wake_if(server, &wdata->credits, 0);
3382                         kref_put(&wdata->refcount,
3383                                  cifs_uncached_writedata_release);
3384                         if (rc == -EAGAIN) {
3385                                 *from = saved_from;
3386                                 iov_iter_advance(from, offset - saved_offset);
3387                                 continue;
3388                         }
3389                         break;
3390                 }
3391
3392                 list_add_tail(&wdata->list, wdata_list);
3393                 offset += cur_len;
3394                 len -= cur_len;
3395         } while (len > 0);
3396
3397         free_xid(xid);
3398         return rc;
3399 }
3400
3401 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3402 {
3403         struct cifs_writedata *wdata, *tmp;
3404         struct cifs_tcon *tcon;
3405         struct cifs_sb_info *cifs_sb;
3406         struct dentry *dentry = ctx->cfile->dentry;
3407         ssize_t rc;
3408
3409         tcon = tlink_tcon(ctx->cfile->tlink);
3410         cifs_sb = CIFS_SB(dentry->d_sb);
3411
3412         mutex_lock(&ctx->aio_mutex);
3413
3414         if (list_empty(&ctx->list)) {
3415                 mutex_unlock(&ctx->aio_mutex);
3416                 return;
3417         }
3418
3419         rc = ctx->rc;
3420         /*
3421          * Wait for and collect replies for any successful sends in order of
3422          * increasing offset. Once an error is hit, then return without waiting
3423          * for any more replies.
3424          */
3425 restart_loop:
3426         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3427                 if (!rc) {
3428                         if (!try_wait_for_completion(&wdata->done)) {
3429                                 mutex_unlock(&ctx->aio_mutex);
3430                                 return;
3431                         }
3432
3433                         if (wdata->result)
3434                                 rc = wdata->result;
3435                         else
3436                                 ctx->total_len += wdata->bytes;
3437
3438                         /* resend call if it's a retryable error */
3439                         if (rc == -EAGAIN) {
3440                                 struct list_head tmp_list;
3441                                 struct iov_iter tmp_from = ctx->iter;
3442
3443                                 INIT_LIST_HEAD(&tmp_list);
3444                                 list_del_init(&wdata->list);
3445
3446                                 if (ctx->direct_io)
3447                                         rc = cifs_resend_wdata(
3448                                                 wdata, &tmp_list, ctx);
3449                                 else {
3450                                         iov_iter_advance(&tmp_from,
3451                                                  wdata->offset - ctx->pos);
3452
3453                                         rc = cifs_write_from_iter(wdata->offset,
3454                                                 wdata->bytes, &tmp_from,
3455                                                 ctx->cfile, cifs_sb, &tmp_list,
3456                                                 ctx);
3457
3458                                         kref_put(&wdata->refcount,
3459                                                 cifs_uncached_writedata_release);
3460                                 }
3461
3462                                 list_splice(&tmp_list, &ctx->list);
3463                                 goto restart_loop;
3464                         }
3465                 }
3466                 list_del_init(&wdata->list);
3467                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3468         }
3469
3470         cifs_stats_bytes_written(tcon, ctx->total_len);
3471         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3472
3473         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3474
3475         mutex_unlock(&ctx->aio_mutex);
3476
3477         if (ctx->iocb && ctx->iocb->ki_complete)
3478                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3479         else
3480                 complete(&ctx->done);
3481 }
3482
3483 static ssize_t __cifs_writev(
3484         struct kiocb *iocb, struct iov_iter *from, bool direct)
3485 {
3486         struct file *file = iocb->ki_filp;
3487         ssize_t total_written = 0;
3488         struct cifsFileInfo *cfile;
3489         struct cifs_tcon *tcon;
3490         struct cifs_sb_info *cifs_sb;
3491         struct cifs_aio_ctx *ctx;
3492         struct iov_iter saved_from = *from;
3493         size_t len = iov_iter_count(from);
3494         int rc;
3495
3496         /*
3497          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3498          * In this case, fall back to non-direct write function.
3499          * this could be improved by getting pages directly in ITER_KVEC
3500          */
3501         if (direct && iov_iter_is_kvec(from)) {
3502                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3503                 direct = false;
3504         }
3505
3506         rc = generic_write_checks(iocb, from);
3507         if (rc <= 0)
3508                 return rc;
3509
3510         cifs_sb = CIFS_FILE_SB(file);
3511         cfile = file->private_data;
3512         tcon = tlink_tcon(cfile->tlink);
3513
3514         if (!tcon->ses->server->ops->async_writev)
3515                 return -ENOSYS;
3516
3517         ctx = cifs_aio_ctx_alloc();
3518         if (!ctx)
3519                 return -ENOMEM;
3520
3521         ctx->cfile = cifsFileInfo_get(cfile);
3522
3523         if (!is_sync_kiocb(iocb))
3524                 ctx->iocb = iocb;
3525
3526         ctx->pos = iocb->ki_pos;
3527
3528         if (direct) {
3529                 ctx->direct_io = true;
3530                 ctx->iter = *from;
3531                 ctx->len = len;
3532         } else {
3533                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3534                 if (rc) {
3535                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3536                         return rc;
3537                 }
3538         }
3539
3540         /* grab a lock here due to read response handlers can access ctx */
3541         mutex_lock(&ctx->aio_mutex);
3542
3543         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3544                                   cfile, cifs_sb, &ctx->list, ctx);
3545
3546         /*
3547          * If at least one write was successfully sent, then discard any rc
3548          * value from the later writes. If the other write succeeds, then
3549          * we'll end up returning whatever was written. If it fails, then
3550          * we'll get a new rc value from that.
3551          */
3552         if (!list_empty(&ctx->list))
3553                 rc = 0;
3554
3555         mutex_unlock(&ctx->aio_mutex);
3556
3557         if (rc) {
3558                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3559                 return rc;
3560         }
3561
3562         if (!is_sync_kiocb(iocb)) {
3563                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3564                 return -EIOCBQUEUED;
3565         }
3566
3567         rc = wait_for_completion_killable(&ctx->done);
3568         if (rc) {
3569                 mutex_lock(&ctx->aio_mutex);
3570                 ctx->rc = rc = -EINTR;
3571                 total_written = ctx->total_len;
3572                 mutex_unlock(&ctx->aio_mutex);
3573         } else {
3574                 rc = ctx->rc;
3575                 total_written = ctx->total_len;
3576         }
3577
3578         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3579
3580         if (unlikely(!total_written))
3581                 return rc;
3582
3583         iocb->ki_pos += total_written;
3584         return total_written;
3585 }
3586
3587 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3588 {
3589         struct file *file = iocb->ki_filp;
3590
3591         cifs_revalidate_mapping(file->f_inode);
3592         return __cifs_writev(iocb, from, true);
3593 }
3594
3595 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3596 {
3597         return __cifs_writev(iocb, from, false);
3598 }
3599
3600 static ssize_t
3601 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3602 {
3603         struct file *file = iocb->ki_filp;
3604         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3605         struct inode *inode = file->f_mapping->host;
3606         struct cifsInodeInfo *cinode = CIFS_I(inode);
3607         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3608         ssize_t rc;
3609
3610         inode_lock(inode);
3611         /*
3612          * We need to hold the sem to be sure nobody modifies lock list
3613          * with a brlock that prevents writing.
3614          */
3615         down_read(&cinode->lock_sem);
3616
3617         rc = generic_write_checks(iocb, from);
3618         if (rc <= 0)
3619                 goto out;
3620
3621         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3622                                      server->vals->exclusive_lock_type, 0,
3623                                      NULL, CIFS_WRITE_OP))
3624                 rc = __generic_file_write_iter(iocb, from);
3625         else
3626                 rc = -EACCES;
3627 out:
3628         up_read(&cinode->lock_sem);
3629         inode_unlock(inode);
3630
3631         if (rc > 0)
3632                 rc = generic_write_sync(iocb, rc);
3633         return rc;
3634 }
3635
3636 ssize_t
3637 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3638 {
3639         struct inode *inode = file_inode(iocb->ki_filp);
3640         struct cifsInodeInfo *cinode = CIFS_I(inode);
3641         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3642         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3643                                                 iocb->ki_filp->private_data;
3644         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3645         ssize_t written;
3646
3647         written = cifs_get_writer(cinode);
3648         if (written)
3649                 return written;
3650
3651         if (CIFS_CACHE_WRITE(cinode)) {
3652                 if (cap_unix(tcon->ses) &&
3653                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3654                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3655                         written = generic_file_write_iter(iocb, from);
3656                         goto out;
3657                 }
3658                 written = cifs_writev(iocb, from);
3659                 goto out;
3660         }
3661         /*
3662          * For non-oplocked files in strict cache mode we need to write the data
3663          * to the server exactly from the pos to pos+len-1 rather than flush all
3664          * affected pages because it may cause a error with mandatory locks on
3665          * these pages but not on the region from pos to ppos+len-1.
3666          */
3667         written = cifs_user_writev(iocb, from);
3668         if (CIFS_CACHE_READ(cinode)) {
3669                 /*
3670                  * We have read level caching and we have just sent a write
3671                  * request to the server thus making data in the cache stale.
3672                  * Zap the cache and set oplock/lease level to NONE to avoid
3673                  * reading stale data from the cache. All subsequent read
3674                  * operations will read new data from the server.
3675                  */
3676                 cifs_zap_mapping(inode);
3677                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3678                          inode);
3679                 cinode->oplock = 0;
3680         }
3681 out:
3682         cifs_put_writer(cinode);
3683         return written;
3684 }
3685
3686 static struct cifs_readdata *
3687 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3688 {
3689         struct cifs_readdata *rdata;
3690
3691         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3692         if (rdata != NULL) {
3693                 rdata->pages = pages;
3694                 kref_init(&rdata->refcount);
3695                 INIT_LIST_HEAD(&rdata->list);
3696                 init_completion(&rdata->done);
3697                 INIT_WORK(&rdata->work, complete);
3698         }
3699
3700         return rdata;
3701 }
3702
3703 static struct cifs_readdata *
3704 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3705 {
3706         struct page **pages =
3707                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3708         struct cifs_readdata *ret = NULL;
3709
3710         if (pages) {
3711                 ret = cifs_readdata_direct_alloc(pages, complete);
3712                 if (!ret)
3713                         kfree(pages);
3714         }
3715
3716         return ret;
3717 }
3718
3719 void
3720 cifs_readdata_release(struct kref *refcount)
3721 {
3722         struct cifs_readdata *rdata = container_of(refcount,
3723                                         struct cifs_readdata, refcount);
3724 #ifdef CONFIG_CIFS_SMB_DIRECT
3725         if (rdata->mr) {
3726                 smbd_deregister_mr(rdata->mr);
3727                 rdata->mr = NULL;
3728         }
3729 #endif
3730         if (rdata->cfile)
3731                 cifsFileInfo_put(rdata->cfile);
3732
3733         kvfree(rdata->pages);
3734         kfree(rdata);
3735 }
3736
3737 static int
3738 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3739 {
3740         int rc = 0;
3741         struct page *page;
3742         unsigned int i;
3743
3744         for (i = 0; i < nr_pages; i++) {
3745                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3746                 if (!page) {
3747                         rc = -ENOMEM;
3748                         break;
3749                 }
3750                 rdata->pages[i] = page;
3751         }
3752
3753         if (rc) {
3754                 unsigned int nr_page_failed = i;
3755
3756                 for (i = 0; i < nr_page_failed; i++) {
3757                         put_page(rdata->pages[i]);
3758                         rdata->pages[i] = NULL;
3759                 }
3760         }
3761         return rc;
3762 }
3763
3764 static void
3765 cifs_uncached_readdata_release(struct kref *refcount)
3766 {
3767         struct cifs_readdata *rdata = container_of(refcount,
3768                                         struct cifs_readdata, refcount);
3769         unsigned int i;
3770
3771         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3772         for (i = 0; i < rdata->nr_pages; i++) {
3773                 put_page(rdata->pages[i]);
3774         }
3775         cifs_readdata_release(refcount);
3776 }
3777
3778 /**
3779  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3780  * @rdata:      the readdata response with list of pages holding data
3781  * @iter:       destination for our data
3782  *
3783  * This function copies data from a list of pages in a readdata response into
3784  * an array of iovecs. It will first calculate where the data should go
3785  * based on the info in the readdata and then copy the data into that spot.
3786  */
3787 static int
3788 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3789 {
3790         size_t remaining = rdata->got_bytes;
3791         unsigned int i;
3792
3793         for (i = 0; i < rdata->nr_pages; i++) {
3794                 struct page *page = rdata->pages[i];
3795                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3796                 size_t written;
3797
3798                 if (unlikely(iov_iter_is_pipe(iter))) {
3799                         void *addr = kmap_atomic(page);
3800
3801                         written = copy_to_iter(addr, copy, iter);
3802                         kunmap_atomic(addr);
3803                 } else
3804                         written = copy_page_to_iter(page, 0, copy, iter);
3805                 remaining -= written;
3806                 if (written < copy && iov_iter_count(iter) > 0)
3807                         break;
3808         }
3809         return remaining ? -EFAULT : 0;
3810 }
3811
3812 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3813
3814 static void
3815 cifs_uncached_readv_complete(struct work_struct *work)
3816 {
3817         struct cifs_readdata *rdata = container_of(work,
3818                                                 struct cifs_readdata, work);
3819
3820         complete(&rdata->done);
3821         collect_uncached_read_data(rdata->ctx);
3822         /* the below call can possibly free the last ref to aio ctx */
3823         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3824 }
3825
3826 static int
3827 uncached_fill_pages(struct TCP_Server_Info *server,
3828                     struct cifs_readdata *rdata, struct iov_iter *iter,
3829                     unsigned int len)
3830 {
3831         int result = 0;
3832         unsigned int i;
3833         unsigned int nr_pages = rdata->nr_pages;
3834         unsigned int page_offset = rdata->page_offset;
3835
3836         rdata->got_bytes = 0;
3837         rdata->tailsz = PAGE_SIZE;
3838         for (i = 0; i < nr_pages; i++) {
3839                 struct page *page = rdata->pages[i];
3840                 size_t n;
3841                 unsigned int segment_size = rdata->pagesz;
3842
3843                 if (i == 0)
3844                         segment_size -= page_offset;
3845                 else
3846                         page_offset = 0;
3847
3848
3849                 if (len <= 0) {
3850                         /* no need to hold page hostage */
3851                         rdata->pages[i] = NULL;
3852                         rdata->nr_pages--;
3853                         put_page(page);
3854                         continue;
3855                 }
3856
3857                 n = len;
3858                 if (len >= segment_size)
3859                         /* enough data to fill the page */
3860                         n = segment_size;
3861                 else
3862                         rdata->tailsz = len;
3863                 len -= n;
3864
3865                 if (iter)
3866                         result = copy_page_from_iter(
3867                                         page, page_offset, n, iter);
3868 #ifdef CONFIG_CIFS_SMB_DIRECT
3869                 else if (rdata->mr)
3870                         result = n;
3871 #endif
3872                 else
3873                         result = cifs_read_page_from_socket(
3874                                         server, page, page_offset, n);
3875                 if (result < 0)
3876                         break;
3877
3878                 rdata->got_bytes += result;
3879         }
3880
3881         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
3882                                                 rdata->got_bytes : result;
3883 }
3884
3885 static int
3886 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3887                               struct cifs_readdata *rdata, unsigned int len)
3888 {
3889         return uncached_fill_pages(server, rdata, NULL, len);
3890 }
3891
3892 static int
3893 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3894                               struct cifs_readdata *rdata,
3895                               struct iov_iter *iter)
3896 {
3897         return uncached_fill_pages(server, rdata, iter, iter->count);
3898 }
3899
3900 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3901                         struct list_head *rdata_list,
3902                         struct cifs_aio_ctx *ctx)
3903 {
3904         unsigned int rsize;
3905         struct cifs_credits credits;
3906         int rc;
3907         struct TCP_Server_Info *server;
3908
3909         /* XXX: should we pick a new channel here? */
3910         server = rdata->server;
3911
3912         do {
3913                 if (rdata->cfile->invalidHandle) {
3914                         rc = cifs_reopen_file(rdata->cfile, true);
3915                         if (rc == -EAGAIN)
3916                                 continue;
3917                         else if (rc)
3918                                 break;
3919                 }
3920
3921                 /*
3922                  * Wait for credits to resend this rdata.
3923                  * Note: we are attempting to resend the whole rdata not in
3924                  * segments
3925                  */
3926                 do {
3927                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3928                                                 &rsize, &credits);
3929
3930                         if (rc)
3931                                 goto fail;
3932
3933                         if (rsize < rdata->bytes) {
3934                                 add_credits_and_wake_if(server, &credits, 0);
3935                                 msleep(1000);
3936                         }
3937                 } while (rsize < rdata->bytes);
3938                 rdata->credits = credits;
3939
3940                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3941                 if (!rc) {
3942                         if (rdata->cfile->invalidHandle)
3943                                 rc = -EAGAIN;
3944                         else {
3945 #ifdef CONFIG_CIFS_SMB_DIRECT
3946                                 if (rdata->mr) {
3947                                         rdata->mr->need_invalidate = true;
3948                                         smbd_deregister_mr(rdata->mr);
3949                                         rdata->mr = NULL;
3950                                 }
3951 #endif
3952                                 rc = server->ops->async_readv(rdata);
3953                         }
3954                 }
3955
3956                 /* If the read was successfully sent, we are done */
3957                 if (!rc) {
3958                         /* Add to aio pending list */
3959                         list_add_tail(&rdata->list, rdata_list);
3960                         return 0;
3961                 }
3962
3963                 /* Roll back credits and retry if needed */
3964                 add_credits_and_wake_if(server, &rdata->credits, 0);
3965         } while (rc == -EAGAIN);
3966
3967 fail:
3968         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3969         return rc;
3970 }
3971
3972 static int
3973 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3974                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3975                      struct cifs_aio_ctx *ctx)
3976 {
3977         struct cifs_readdata *rdata;
3978         unsigned int npages, rsize;
3979         struct cifs_credits credits_on_stack;
3980         struct cifs_credits *credits = &credits_on_stack;
3981         size_t cur_len;
3982         int rc;
3983         pid_t pid;
3984         struct TCP_Server_Info *server;
3985         struct page **pagevec;
3986         size_t start;
3987         struct iov_iter direct_iov = ctx->iter;
3988
3989         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3990
3991         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3992                 pid = open_file->pid;
3993         else
3994                 pid = current->tgid;
3995
3996         if (ctx->direct_io)
3997                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3998
3999         do {
4000                 if (open_file->invalidHandle) {
4001                         rc = cifs_reopen_file(open_file, true);
4002                         if (rc == -EAGAIN)
4003                                 continue;
4004                         else if (rc)
4005                                 break;
4006                 }
4007
4008                 if (cifs_sb->ctx->rsize == 0)
4009                         cifs_sb->ctx->rsize =
4010                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4011                                                              cifs_sb->ctx);
4012
4013                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4014                                                    &rsize, credits);
4015                 if (rc)
4016                         break;
4017
4018                 cur_len = min_t(const size_t, len, rsize);
4019
4020                 if (ctx->direct_io) {
4021                         ssize_t result;
4022
4023                         result = iov_iter_get_pages_alloc(
4024                                         &direct_iov, &pagevec,
4025                                         cur_len, &start);
4026                         if (result < 0) {
4027                                 cifs_dbg(VFS,
4028                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4029                                          result, iov_iter_type(&direct_iov),
4030                                          direct_iov.iov_offset,
4031                                          direct_iov.count);
4032                                 dump_stack();
4033
4034                                 rc = result;
4035                                 add_credits_and_wake_if(server, credits, 0);
4036                                 break;
4037                         }
4038                         cur_len = (size_t)result;
4039                         iov_iter_advance(&direct_iov, cur_len);
4040
4041                         rdata = cifs_readdata_direct_alloc(
4042                                         pagevec, cifs_uncached_readv_complete);
4043                         if (!rdata) {
4044                                 add_credits_and_wake_if(server, credits, 0);
4045                                 rc = -ENOMEM;
4046                                 break;
4047                         }
4048
4049                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4050                         rdata->page_offset = start;
4051                         rdata->tailsz = npages > 1 ?
4052                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4053                                 cur_len;
4054
4055                 } else {
4056
4057                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4058                         /* allocate a readdata struct */
4059                         rdata = cifs_readdata_alloc(npages,
4060                                             cifs_uncached_readv_complete);
4061                         if (!rdata) {
4062                                 add_credits_and_wake_if(server, credits, 0);
4063                                 rc = -ENOMEM;
4064                                 break;
4065                         }
4066
4067                         rc = cifs_read_allocate_pages(rdata, npages);
4068                         if (rc) {
4069                                 kvfree(rdata->pages);
4070                                 kfree(rdata);
4071                                 add_credits_and_wake_if(server, credits, 0);
4072                                 break;
4073                         }
4074
4075                         rdata->tailsz = PAGE_SIZE;
4076                 }
4077
4078                 rdata->server = server;
4079                 rdata->cfile = cifsFileInfo_get(open_file);
4080                 rdata->nr_pages = npages;
4081                 rdata->offset = offset;
4082                 rdata->bytes = cur_len;
4083                 rdata->pid = pid;
4084                 rdata->pagesz = PAGE_SIZE;
4085                 rdata->read_into_pages = cifs_uncached_read_into_pages;
4086                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4087                 rdata->credits = credits_on_stack;
4088                 rdata->ctx = ctx;
4089                 kref_get(&ctx->refcount);
4090
4091                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4092
4093                 if (!rc) {
4094                         if (rdata->cfile->invalidHandle)
4095                                 rc = -EAGAIN;
4096                         else
4097                                 rc = server->ops->async_readv(rdata);
4098                 }
4099
4100                 if (rc) {
4101                         add_credits_and_wake_if(server, &rdata->credits, 0);
4102                         kref_put(&rdata->refcount,
4103                                 cifs_uncached_readdata_release);
4104                         if (rc == -EAGAIN) {
4105                                 iov_iter_revert(&direct_iov, cur_len);
4106                                 continue;
4107                         }
4108                         break;
4109                 }
4110
4111                 list_add_tail(&rdata->list, rdata_list);
4112                 offset += cur_len;
4113                 len -= cur_len;
4114         } while (len > 0);
4115
4116         return rc;
4117 }
4118
4119 static void
4120 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4121 {
4122         struct cifs_readdata *rdata, *tmp;
4123         struct iov_iter *to = &ctx->iter;
4124         struct cifs_sb_info *cifs_sb;
4125         int rc;
4126
4127         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4128
4129         mutex_lock(&ctx->aio_mutex);
4130
4131         if (list_empty(&ctx->list)) {
4132                 mutex_unlock(&ctx->aio_mutex);
4133                 return;
4134         }
4135
4136         rc = ctx->rc;
4137         /* the loop below should proceed in the order of increasing offsets */
4138 again:
4139         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4140                 if (!rc) {
4141                         if (!try_wait_for_completion(&rdata->done)) {
4142                                 mutex_unlock(&ctx->aio_mutex);
4143                                 return;
4144                         }
4145
4146                         if (rdata->result == -EAGAIN) {
4147                                 /* resend call if it's a retryable error */
4148                                 struct list_head tmp_list;
4149                                 unsigned int got_bytes = rdata->got_bytes;
4150
4151                                 list_del_init(&rdata->list);
4152                                 INIT_LIST_HEAD(&tmp_list);
4153
4154                                 /*
4155                                  * Got a part of data and then reconnect has
4156                                  * happened -- fill the buffer and continue
4157                                  * reading.
4158                                  */
4159                                 if (got_bytes && got_bytes < rdata->bytes) {
4160                                         rc = 0;
4161                                         if (!ctx->direct_io)
4162                                                 rc = cifs_readdata_to_iov(rdata, to);
4163                                         if (rc) {
4164                                                 kref_put(&rdata->refcount,
4165                                                         cifs_uncached_readdata_release);
4166                                                 continue;
4167                                         }
4168                                 }
4169
4170                                 if (ctx->direct_io) {
4171                                         /*
4172                                          * Re-use rdata as this is a
4173                                          * direct I/O
4174                                          */
4175                                         rc = cifs_resend_rdata(
4176                                                 rdata,
4177                                                 &tmp_list, ctx);
4178                                 } else {
4179                                         rc = cifs_send_async_read(
4180                                                 rdata->offset + got_bytes,
4181                                                 rdata->bytes - got_bytes,
4182                                                 rdata->cfile, cifs_sb,
4183                                                 &tmp_list, ctx);
4184
4185                                         kref_put(&rdata->refcount,
4186                                                 cifs_uncached_readdata_release);
4187                                 }
4188
4189                                 list_splice(&tmp_list, &ctx->list);
4190
4191                                 goto again;
4192                         } else if (rdata->result)
4193                                 rc = rdata->result;
4194                         else if (!ctx->direct_io)
4195                                 rc = cifs_readdata_to_iov(rdata, to);
4196
4197                         /* if there was a short read -- discard anything left */
4198                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4199                                 rc = -ENODATA;
4200
4201                         ctx->total_len += rdata->got_bytes;
4202                 }
4203                 list_del_init(&rdata->list);
4204                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4205         }
4206
4207         if (!ctx->direct_io)
4208                 ctx->total_len = ctx->len - iov_iter_count(to);
4209
4210         /* mask nodata case */
4211         if (rc == -ENODATA)
4212                 rc = 0;
4213
4214         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4215
4216         mutex_unlock(&ctx->aio_mutex);
4217
4218         if (ctx->iocb && ctx->iocb->ki_complete)
4219                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
4220         else
4221                 complete(&ctx->done);
4222 }
4223
4224 static ssize_t __cifs_readv(
4225         struct kiocb *iocb, struct iov_iter *to, bool direct)
4226 {
4227         size_t len;
4228         struct file *file = iocb->ki_filp;
4229         struct cifs_sb_info *cifs_sb;
4230         struct cifsFileInfo *cfile;
4231         struct cifs_tcon *tcon;
4232         ssize_t rc, total_read = 0;
4233         loff_t offset = iocb->ki_pos;
4234         struct cifs_aio_ctx *ctx;
4235
4236         /*
4237          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4238          * fall back to data copy read path
4239          * this could be improved by getting pages directly in ITER_KVEC
4240          */
4241         if (direct && iov_iter_is_kvec(to)) {
4242                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4243                 direct = false;
4244         }
4245
4246         len = iov_iter_count(to);
4247         if (!len)
4248                 return 0;
4249
4250         cifs_sb = CIFS_FILE_SB(file);
4251         cfile = file->private_data;
4252         tcon = tlink_tcon(cfile->tlink);
4253
4254         if (!tcon->ses->server->ops->async_readv)
4255                 return -ENOSYS;
4256
4257         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4258                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4259
4260         ctx = cifs_aio_ctx_alloc();
4261         if (!ctx)
4262                 return -ENOMEM;
4263
4264         ctx->cfile = cifsFileInfo_get(cfile);
4265
4266         if (!is_sync_kiocb(iocb))
4267                 ctx->iocb = iocb;
4268
4269         if (iter_is_iovec(to))
4270                 ctx->should_dirty = true;
4271
4272         if (direct) {
4273                 ctx->pos = offset;
4274                 ctx->direct_io = true;
4275                 ctx->iter = *to;
4276                 ctx->len = len;
4277         } else {
4278                 rc = setup_aio_ctx_iter(ctx, to, READ);
4279                 if (rc) {
4280                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4281                         return rc;
4282                 }
4283                 len = ctx->len;
4284         }
4285
4286         if (direct) {
4287                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4288                                                   offset, offset + len - 1);
4289                 if (rc) {
4290                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4291                         return -EAGAIN;
4292                 }
4293         }
4294
4295         /* grab a lock here due to read response handlers can access ctx */
4296         mutex_lock(&ctx->aio_mutex);
4297
4298         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4299
4300         /* if at least one read request send succeeded, then reset rc */
4301         if (!list_empty(&ctx->list))
4302                 rc = 0;
4303
4304         mutex_unlock(&ctx->aio_mutex);
4305
4306         if (rc) {
4307                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4308                 return rc;
4309         }
4310
4311         if (!is_sync_kiocb(iocb)) {
4312                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4313                 return -EIOCBQUEUED;
4314         }
4315
4316         rc = wait_for_completion_killable(&ctx->done);
4317         if (rc) {
4318                 mutex_lock(&ctx->aio_mutex);
4319                 ctx->rc = rc = -EINTR;
4320                 total_read = ctx->total_len;
4321                 mutex_unlock(&ctx->aio_mutex);
4322         } else {
4323                 rc = ctx->rc;
4324                 total_read = ctx->total_len;
4325         }
4326
4327         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4328
4329         if (total_read) {
4330                 iocb->ki_pos += total_read;
4331                 return total_read;
4332         }
4333         return rc;
4334 }
4335
4336 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4337 {
4338         return __cifs_readv(iocb, to, true);
4339 }
4340
4341 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4342 {
4343         return __cifs_readv(iocb, to, false);
4344 }
4345
4346 ssize_t
4347 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4348 {
4349         struct inode *inode = file_inode(iocb->ki_filp);
4350         struct cifsInodeInfo *cinode = CIFS_I(inode);
4351         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4352         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4353                                                 iocb->ki_filp->private_data;
4354         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4355         int rc = -EACCES;
4356
4357         /*
4358          * In strict cache mode we need to read from the server all the time
4359          * if we don't have level II oplock because the server can delay mtime
4360          * change - so we can't make a decision about inode invalidating.
4361          * And we can also fail with pagereading if there are mandatory locks
4362          * on pages affected by this read but not on the region from pos to
4363          * pos+len-1.
4364          */
4365         if (!CIFS_CACHE_READ(cinode))
4366                 return cifs_user_readv(iocb, to);
4367
4368         if (cap_unix(tcon->ses) &&
4369             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4370             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4371                 return generic_file_read_iter(iocb, to);
4372
4373         /*
4374          * We need to hold the sem to be sure nobody modifies lock list
4375          * with a brlock that prevents reading.
4376          */
4377         down_read(&cinode->lock_sem);
4378         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4379                                      tcon->ses->server->vals->shared_lock_type,
4380                                      0, NULL, CIFS_READ_OP))
4381                 rc = generic_file_read_iter(iocb, to);
4382         up_read(&cinode->lock_sem);
4383         return rc;
4384 }
4385
4386 static ssize_t
4387 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4388 {
4389         int rc = -EACCES;
4390         unsigned int bytes_read = 0;
4391         unsigned int total_read;
4392         unsigned int current_read_size;
4393         unsigned int rsize;
4394         struct cifs_sb_info *cifs_sb;
4395         struct cifs_tcon *tcon;
4396         struct TCP_Server_Info *server;
4397         unsigned int xid;
4398         char *cur_offset;
4399         struct cifsFileInfo *open_file;
4400         struct cifs_io_parms io_parms = {0};
4401         int buf_type = CIFS_NO_BUFFER;
4402         __u32 pid;
4403
4404         xid = get_xid();
4405         cifs_sb = CIFS_FILE_SB(file);
4406
4407         /* FIXME: set up handlers for larger reads and/or convert to async */
4408         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4409
4410         if (file->private_data == NULL) {
4411                 rc = -EBADF;
4412                 free_xid(xid);
4413                 return rc;
4414         }
4415         open_file = file->private_data;
4416         tcon = tlink_tcon(open_file->tlink);
4417         server = cifs_pick_channel(tcon->ses);
4418
4419         if (!server->ops->sync_read) {
4420                 free_xid(xid);
4421                 return -ENOSYS;
4422         }
4423
4424         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4425                 pid = open_file->pid;
4426         else
4427                 pid = current->tgid;
4428
4429         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4430                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4431
4432         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4433              total_read += bytes_read, cur_offset += bytes_read) {
4434                 do {
4435                         current_read_size = min_t(uint, read_size - total_read,
4436                                                   rsize);
4437                         /*
4438                          * For windows me and 9x we do not want to request more
4439                          * than it negotiated since it will refuse the read
4440                          * then.
4441                          */
4442                         if (!(tcon->ses->capabilities &
4443                                 tcon->ses->server->vals->cap_large_files)) {
4444                                 current_read_size = min_t(uint,
4445                                         current_read_size, CIFSMaxBufSize);
4446                         }
4447                         if (open_file->invalidHandle) {
4448                                 rc = cifs_reopen_file(open_file, true);
4449                                 if (rc != 0)
4450                                         break;
4451                         }
4452                         io_parms.pid = pid;
4453                         io_parms.tcon = tcon;
4454                         io_parms.offset = *offset;
4455                         io_parms.length = current_read_size;
4456                         io_parms.server = server;
4457                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4458                                                     &bytes_read, &cur_offset,
4459                                                     &buf_type);
4460                 } while (rc == -EAGAIN);
4461
4462                 if (rc || (bytes_read == 0)) {
4463                         if (total_read) {
4464                                 break;
4465                         } else {
4466                                 free_xid(xid);
4467                                 return rc;
4468                         }
4469                 } else {
4470                         cifs_stats_bytes_read(tcon, total_read);
4471                         *offset += bytes_read;
4472                 }
4473         }
4474         free_xid(xid);
4475         return total_read;
4476 }
4477
4478 /*
4479  * If the page is mmap'ed into a process' page tables, then we need to make
4480  * sure that it doesn't change while being written back.
4481  */
4482 static vm_fault_t
4483 cifs_page_mkwrite(struct vm_fault *vmf)
4484 {
4485         struct page *page = vmf->page;
4486         struct file *file = vmf->vma->vm_file;
4487         struct inode *inode = file_inode(file);
4488
4489         cifs_fscache_wait_on_page_write(inode, page);
4490
4491         lock_page(page);
4492         return VM_FAULT_LOCKED;
4493 }
4494
4495 static const struct vm_operations_struct cifs_file_vm_ops = {
4496         .fault = filemap_fault,
4497         .map_pages = filemap_map_pages,
4498         .page_mkwrite = cifs_page_mkwrite,
4499 };
4500
4501 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4502 {
4503         int xid, rc = 0;
4504         struct inode *inode = file_inode(file);
4505
4506         xid = get_xid();
4507
4508         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4509                 rc = cifs_zap_mapping(inode);
4510         if (!rc)
4511                 rc = generic_file_mmap(file, vma);
4512         if (!rc)
4513                 vma->vm_ops = &cifs_file_vm_ops;
4514
4515         free_xid(xid);
4516         return rc;
4517 }
4518
4519 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4520 {
4521         int rc, xid;
4522
4523         xid = get_xid();
4524
4525         rc = cifs_revalidate_file(file);
4526         if (rc)
4527                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4528                          rc);
4529         if (!rc)
4530                 rc = generic_file_mmap(file, vma);
4531         if (!rc)
4532                 vma->vm_ops = &cifs_file_vm_ops;
4533
4534         free_xid(xid);
4535         return rc;
4536 }
4537
4538 static void
4539 cifs_readv_complete(struct work_struct *work)
4540 {
4541         unsigned int i, got_bytes;
4542         struct cifs_readdata *rdata = container_of(work,
4543                                                 struct cifs_readdata, work);
4544
4545         got_bytes = rdata->got_bytes;
4546         for (i = 0; i < rdata->nr_pages; i++) {
4547                 struct page *page = rdata->pages[i];
4548
4549                 lru_cache_add(page);
4550
4551                 if (rdata->result == 0 ||
4552                     (rdata->result == -EAGAIN && got_bytes)) {
4553                         flush_dcache_page(page);
4554                         SetPageUptodate(page);
4555                 } else
4556                         SetPageError(page);
4557
4558                 unlock_page(page);
4559
4560                 if (rdata->result == 0 ||
4561                     (rdata->result == -EAGAIN && got_bytes))
4562                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4563                 else
4564                         cifs_fscache_uncache_page(rdata->mapping->host, page);
4565
4566                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4567
4568                 put_page(page);
4569                 rdata->pages[i] = NULL;
4570         }
4571         kref_put(&rdata->refcount, cifs_readdata_release);
4572 }
4573
4574 static int
4575 readpages_fill_pages(struct TCP_Server_Info *server,
4576                      struct cifs_readdata *rdata, struct iov_iter *iter,
4577                      unsigned int len)
4578 {
4579         int result = 0;
4580         unsigned int i;
4581         u64 eof;
4582         pgoff_t eof_index;
4583         unsigned int nr_pages = rdata->nr_pages;
4584         unsigned int page_offset = rdata->page_offset;
4585
4586         /* determine the eof that the server (probably) has */
4587         eof = CIFS_I(rdata->mapping->host)->server_eof;
4588         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4589         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4590
4591         rdata->got_bytes = 0;
4592         rdata->tailsz = PAGE_SIZE;
4593         for (i = 0; i < nr_pages; i++) {
4594                 struct page *page = rdata->pages[i];
4595                 unsigned int to_read = rdata->pagesz;
4596                 size_t n;
4597
4598                 if (i == 0)
4599                         to_read -= page_offset;
4600                 else
4601                         page_offset = 0;
4602
4603                 n = to_read;
4604
4605                 if (len >= to_read) {
4606                         len -= to_read;
4607                 } else if (len > 0) {
4608                         /* enough for partial page, fill and zero the rest */
4609                         zero_user(page, len + page_offset, to_read - len);
4610                         n = rdata->tailsz = len;
4611                         len = 0;
4612                 } else if (page->index > eof_index) {
4613                         /*
4614                          * The VFS will not try to do readahead past the
4615                          * i_size, but it's possible that we have outstanding
4616                          * writes with gaps in the middle and the i_size hasn't
4617                          * caught up yet. Populate those with zeroed out pages
4618                          * to prevent the VFS from repeatedly attempting to
4619                          * fill them until the writes are flushed.
4620                          */
4621                         zero_user(page, 0, PAGE_SIZE);
4622                         lru_cache_add(page);
4623                         flush_dcache_page(page);
4624                         SetPageUptodate(page);
4625                         unlock_page(page);
4626                         put_page(page);
4627                         rdata->pages[i] = NULL;
4628                         rdata->nr_pages--;
4629                         continue;
4630                 } else {
4631                         /* no need to hold page hostage */
4632                         lru_cache_add(page);
4633                         unlock_page(page);
4634                         put_page(page);
4635                         rdata->pages[i] = NULL;
4636                         rdata->nr_pages--;
4637                         continue;
4638                 }
4639
4640                 if (iter)
4641                         result = copy_page_from_iter(
4642                                         page, page_offset, n, iter);
4643 #ifdef CONFIG_CIFS_SMB_DIRECT
4644                 else if (rdata->mr)
4645                         result = n;
4646 #endif
4647                 else
4648                         result = cifs_read_page_from_socket(
4649                                         server, page, page_offset, n);
4650                 if (result < 0)
4651                         break;
4652
4653                 rdata->got_bytes += result;
4654         }
4655
4656         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
4657                                                 rdata->got_bytes : result;
4658 }
4659
4660 static int
4661 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4662                                struct cifs_readdata *rdata, unsigned int len)
4663 {
4664         return readpages_fill_pages(server, rdata, NULL, len);
4665 }
4666
4667 static int
4668 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4669                                struct cifs_readdata *rdata,
4670                                struct iov_iter *iter)
4671 {
4672         return readpages_fill_pages(server, rdata, iter, iter->count);
4673 }
4674
4675 static int
4676 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4677                     unsigned int rsize, struct list_head *tmplist,
4678                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4679 {
4680         struct page *page, *tpage;
4681         unsigned int expected_index;
4682         int rc;
4683         gfp_t gfp = readahead_gfp_mask(mapping);
4684
4685         INIT_LIST_HEAD(tmplist);
4686
4687         page = lru_to_page(page_list);
4688
4689         /*
4690          * Lock the page and put it in the cache. Since no one else
4691          * should have access to this page, we're safe to simply set
4692          * PG_locked without checking it first.
4693          */
4694         __SetPageLocked(page);
4695         rc = add_to_page_cache_locked(page, mapping,
4696                                       page->index, gfp);
4697
4698         /* give up if we can't stick it in the cache */
4699         if (rc) {
4700                 __ClearPageLocked(page);
4701                 return rc;
4702         }
4703
4704         /* move first page to the tmplist */
4705         *offset = (loff_t)page->index << PAGE_SHIFT;
4706         *bytes = PAGE_SIZE;
4707         *nr_pages = 1;
4708         list_move_tail(&page->lru, tmplist);
4709
4710         /* now try and add more pages onto the request */
4711         expected_index = page->index + 1;
4712         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4713                 /* discontinuity ? */
4714                 if (page->index != expected_index)
4715                         break;
4716
4717                 /* would this page push the read over the rsize? */
4718                 if (*bytes + PAGE_SIZE > rsize)
4719                         break;
4720
4721                 __SetPageLocked(page);
4722                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4723                 if (rc) {
4724                         __ClearPageLocked(page);
4725                         break;
4726                 }
4727                 list_move_tail(&page->lru, tmplist);
4728                 (*bytes) += PAGE_SIZE;
4729                 expected_index++;
4730                 (*nr_pages)++;
4731         }
4732         return rc;
4733 }
4734
4735 static int cifs_readpages(struct file *file, struct address_space *mapping,
4736         struct list_head *page_list, unsigned num_pages)
4737 {
4738         int rc;
4739         int err = 0;
4740         struct list_head tmplist;
4741         struct cifsFileInfo *open_file = file->private_data;
4742         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4743         struct TCP_Server_Info *server;
4744         pid_t pid;
4745         unsigned int xid;
4746
4747         xid = get_xid();
4748         /*
4749          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4750          * immediately if the cookie is negative
4751          *
4752          * After this point, every page in the list might have PG_fscache set,
4753          * so we will need to clean that up off of every page we don't use.
4754          */
4755         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4756                                          &num_pages);
4757         if (rc == 0) {
4758                 free_xid(xid);
4759                 return rc;
4760         }
4761
4762         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4763                 pid = open_file->pid;
4764         else
4765                 pid = current->tgid;
4766
4767         rc = 0;
4768         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4769
4770         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4771                  __func__, file, mapping, num_pages);
4772
4773         /*
4774          * Start with the page at end of list and move it to private
4775          * list. Do the same with any following pages until we hit
4776          * the rsize limit, hit an index discontinuity, or run out of
4777          * pages. Issue the async read and then start the loop again
4778          * until the list is empty.
4779          *
4780          * Note that list order is important. The page_list is in
4781          * the order of declining indexes. When we put the pages in
4782          * the rdata->pages, then we want them in increasing order.
4783          */
4784         while (!list_empty(page_list) && !err) {
4785                 unsigned int i, nr_pages, bytes, rsize;
4786                 loff_t offset;
4787                 struct page *page, *tpage;
4788                 struct cifs_readdata *rdata;
4789                 struct cifs_credits credits_on_stack;
4790                 struct cifs_credits *credits = &credits_on_stack;
4791
4792                 if (open_file->invalidHandle) {
4793                         rc = cifs_reopen_file(open_file, true);
4794                         if (rc == -EAGAIN)
4795                                 continue;
4796                         else if (rc)
4797                                 break;
4798                 }
4799
4800                 if (cifs_sb->ctx->rsize == 0)
4801                         cifs_sb->ctx->rsize =
4802                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4803                                                              cifs_sb->ctx);
4804
4805                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4806                                                    &rsize, credits);
4807                 if (rc)
4808                         break;
4809
4810                 /*
4811                  * Give up immediately if rsize is too small to read an entire
4812                  * page. The VFS will fall back to readpage. We should never
4813                  * reach this point however since we set ra_pages to 0 when the
4814                  * rsize is smaller than a cache page.
4815                  */
4816                 if (unlikely(rsize < PAGE_SIZE)) {
4817                         add_credits_and_wake_if(server, credits, 0);
4818                         free_xid(xid);
4819                         return 0;
4820                 }
4821
4822                 nr_pages = 0;
4823                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4824                                          &nr_pages, &offset, &bytes);
4825                 if (!nr_pages) {
4826                         add_credits_and_wake_if(server, credits, 0);
4827                         break;
4828                 }
4829
4830                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4831                 if (!rdata) {
4832                         /* best to give up if we're out of mem */
4833                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4834                                 list_del(&page->lru);
4835                                 lru_cache_add(page);
4836                                 unlock_page(page);
4837                                 put_page(page);
4838                         }
4839                         rc = -ENOMEM;
4840                         add_credits_and_wake_if(server, credits, 0);
4841                         break;
4842                 }
4843
4844                 rdata->cfile = cifsFileInfo_get(open_file);
4845                 rdata->server = server;
4846                 rdata->mapping = mapping;
4847                 rdata->offset = offset;
4848                 rdata->bytes = bytes;
4849                 rdata->pid = pid;
4850                 rdata->pagesz = PAGE_SIZE;
4851                 rdata->tailsz = PAGE_SIZE;
4852                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4853                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4854                 rdata->credits = credits_on_stack;
4855
4856                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4857                         list_del(&page->lru);
4858                         rdata->pages[rdata->nr_pages++] = page;
4859                 }
4860
4861                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4862
4863                 if (!rc) {
4864                         if (rdata->cfile->invalidHandle)
4865                                 rc = -EAGAIN;
4866                         else
4867                                 rc = server->ops->async_readv(rdata);
4868                 }
4869
4870                 if (rc) {
4871                         add_credits_and_wake_if(server, &rdata->credits, 0);
4872                         for (i = 0; i < rdata->nr_pages; i++) {
4873                                 page = rdata->pages[i];
4874                                 lru_cache_add(page);
4875                                 unlock_page(page);
4876                                 put_page(page);
4877                         }
4878                         /* Fallback to the readpage in error/reconnect cases */
4879                         kref_put(&rdata->refcount, cifs_readdata_release);
4880                         break;
4881                 }
4882
4883                 kref_put(&rdata->refcount, cifs_readdata_release);
4884         }
4885
4886         /* Any pages that have been shown to fscache but didn't get added to
4887          * the pagecache must be uncached before they get returned to the
4888          * allocator.
4889          */
4890         cifs_fscache_readpages_cancel(mapping->host, page_list);
4891         free_xid(xid);
4892         return rc;
4893 }
4894
4895 /*
4896  * cifs_readpage_worker must be called with the page pinned
4897  */
4898 static int cifs_readpage_worker(struct file *file, struct page *page,
4899         loff_t *poffset)
4900 {
4901         char *read_data;
4902         int rc;
4903
4904         /* Is the page cached? */
4905         rc = cifs_readpage_from_fscache(file_inode(file), page);
4906         if (rc == 0)
4907                 goto read_complete;
4908
4909         read_data = kmap(page);
4910         /* for reads over a certain size could initiate async read ahead */
4911
4912         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4913
4914         if (rc < 0)
4915                 goto io_error;
4916         else
4917                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4918
4919         /* we do not want atime to be less than mtime, it broke some apps */
4920         file_inode(file)->i_atime = current_time(file_inode(file));
4921         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)) < 0)
4922                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4923         else
4924                 file_inode(file)->i_atime = current_time(file_inode(file));
4925
4926         if (PAGE_SIZE > rc)
4927                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4928
4929         flush_dcache_page(page);
4930         SetPageUptodate(page);
4931
4932         /* send this page to the cache */
4933         cifs_readpage_to_fscache(file_inode(file), page);
4934
4935         rc = 0;
4936
4937 io_error:
4938         kunmap(page);
4939
4940 read_complete:
4941         unlock_page(page);
4942         return rc;
4943 }
4944
4945 static int cifs_readpage(struct file *file, struct page *page)
4946 {
4947         loff_t offset = page_file_offset(page);
4948         int rc = -EACCES;
4949         unsigned int xid;
4950
4951         xid = get_xid();
4952
4953         if (file->private_data == NULL) {
4954                 rc = -EBADF;
4955                 free_xid(xid);
4956                 return rc;
4957         }
4958
4959         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4960                  page, (int)offset, (int)offset);
4961
4962         rc = cifs_readpage_worker(file, page, &offset);
4963
4964         free_xid(xid);
4965         return rc;
4966 }
4967
4968 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4969 {
4970         struct cifsFileInfo *open_file;
4971
4972         spin_lock(&cifs_inode->open_file_lock);
4973         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4974                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4975                         spin_unlock(&cifs_inode->open_file_lock);
4976                         return 1;
4977                 }
4978         }
4979         spin_unlock(&cifs_inode->open_file_lock);
4980         return 0;
4981 }
4982
4983 /* We do not want to update the file size from server for inodes
4984    open for write - to avoid races with writepage extending
4985    the file - in the future we could consider allowing
4986    refreshing the inode only on increases in the file size
4987    but this is tricky to do without racing with writebehind
4988    page caching in the current Linux kernel design */
4989 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4990 {
4991         if (!cifsInode)
4992                 return true;
4993
4994         if (is_inode_writable(cifsInode)) {
4995                 /* This inode is open for write at least once */
4996                 struct cifs_sb_info *cifs_sb;
4997
4998                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4999                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
5000                         /* since no page cache to corrupt on directio
5001                         we can change size safely */
5002                         return true;
5003                 }
5004
5005                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
5006                         return true;
5007
5008                 return false;
5009         } else
5010                 return true;
5011 }
5012
5013 static int cifs_write_begin(struct file *file, struct address_space *mapping,
5014                         loff_t pos, unsigned len, unsigned flags,
5015                         struct page **pagep, void **fsdata)
5016 {
5017         int oncethru = 0;
5018         pgoff_t index = pos >> PAGE_SHIFT;
5019         loff_t offset = pos & (PAGE_SIZE - 1);
5020         loff_t page_start = pos & PAGE_MASK;
5021         loff_t i_size;
5022         struct page *page;
5023         int rc = 0;
5024
5025         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
5026
5027 start:
5028         page = grab_cache_page_write_begin(mapping, index, flags);
5029         if (!page) {
5030                 rc = -ENOMEM;
5031                 goto out;
5032         }
5033
5034         if (PageUptodate(page))
5035                 goto out;
5036
5037         /*
5038          * If we write a full page it will be up to date, no need to read from
5039          * the server. If the write is short, we'll end up doing a sync write
5040          * instead.
5041          */
5042         if (len == PAGE_SIZE)
5043                 goto out;
5044
5045         /*
5046          * optimize away the read when we have an oplock, and we're not
5047          * expecting to use any of the data we'd be reading in. That
5048          * is, when the page lies beyond the EOF, or straddles the EOF
5049          * and the write will cover all of the existing data.
5050          */
5051         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
5052                 i_size = i_size_read(mapping->host);
5053                 if (page_start >= i_size ||
5054                     (offset == 0 && (pos + len) >= i_size)) {
5055                         zero_user_segments(page, 0, offset,
5056                                            offset + len,
5057                                            PAGE_SIZE);
5058                         /*
5059                          * PageChecked means that the parts of the page
5060                          * to which we're not writing are considered up
5061                          * to date. Once the data is copied to the
5062                          * page, it can be set uptodate.
5063                          */
5064                         SetPageChecked(page);
5065                         goto out;
5066                 }
5067         }
5068
5069         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
5070                 /*
5071                  * might as well read a page, it is fast enough. If we get
5072                  * an error, we don't need to return it. cifs_write_end will
5073                  * do a sync write instead since PG_uptodate isn't set.
5074                  */
5075                 cifs_readpage_worker(file, page, &page_start);
5076                 put_page(page);
5077                 oncethru = 1;
5078                 goto start;
5079         } else {
5080                 /* we could try using another file handle if there is one -
5081                    but how would we lock it to prevent close of that handle
5082                    racing with this read? In any case
5083                    this will be written out by write_end so is fine */
5084         }
5085 out:
5086         *pagep = page;
5087         return rc;
5088 }
5089
5090 static int cifs_release_page(struct page *page, gfp_t gfp)
5091 {
5092         if (PagePrivate(page))
5093                 return 0;
5094
5095         return cifs_fscache_release_page(page, gfp);
5096 }
5097
5098 static void cifs_invalidate_page(struct page *page, unsigned int offset,
5099                                  unsigned int length)
5100 {
5101         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
5102
5103         if (offset == 0 && length == PAGE_SIZE)
5104                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
5105 }
5106
5107 static int cifs_launder_page(struct page *page)
5108 {
5109         int rc = 0;
5110         loff_t range_start = page_offset(page);
5111         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
5112         struct writeback_control wbc = {
5113                 .sync_mode = WB_SYNC_ALL,
5114                 .nr_to_write = 0,
5115                 .range_start = range_start,
5116                 .range_end = range_end,
5117         };
5118
5119         cifs_dbg(FYI, "Launder page: %p\n", page);
5120
5121         if (clear_page_dirty_for_io(page))
5122                 rc = cifs_writepage_locked(page, &wbc);
5123
5124         cifs_fscache_invalidate_page(page, page->mapping->host);
5125         return rc;
5126 }
5127
5128 void cifs_oplock_break(struct work_struct *work)
5129 {
5130         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5131                                                   oplock_break);
5132         struct inode *inode = d_inode(cfile->dentry);
5133         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5134         struct cifsInodeInfo *cinode = CIFS_I(inode);
5135         struct cifs_tcon *tcon;
5136         struct TCP_Server_Info *server;
5137         struct tcon_link *tlink;
5138         int rc = 0;
5139         bool purge_cache = false, oplock_break_cancelled;
5140         __u64 persistent_fid, volatile_fid;
5141         __u16 net_fid;
5142
5143         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5144                         TASK_UNINTERRUPTIBLE);
5145
5146         tlink = cifs_sb_tlink(cifs_sb);
5147         if (IS_ERR(tlink))
5148                 goto out;
5149         tcon = tlink_tcon(tlink);
5150         server = tcon->ses->server;
5151
5152         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5153                                       cfile->oplock_epoch, &purge_cache);
5154
5155         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5156                                                 cifs_has_mand_locks(cinode)) {
5157                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5158                          inode);
5159                 cinode->oplock = 0;
5160         }
5161
5162         if (inode && S_ISREG(inode->i_mode)) {
5163                 if (CIFS_CACHE_READ(cinode))
5164                         break_lease(inode, O_RDONLY);
5165                 else
5166                         break_lease(inode, O_WRONLY);
5167                 rc = filemap_fdatawrite(inode->i_mapping);
5168                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5169                         rc = filemap_fdatawait(inode->i_mapping);
5170                         mapping_set_error(inode->i_mapping, rc);
5171                         cifs_zap_mapping(inode);
5172                 }
5173                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5174                 if (CIFS_CACHE_WRITE(cinode))
5175                         goto oplock_break_ack;
5176         }
5177
5178         rc = cifs_push_locks(cfile);
5179         if (rc)
5180                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5181
5182 oplock_break_ack:
5183         /*
5184          * When oplock break is received and there are no active
5185          * file handles but cached, then schedule deferred close immediately.
5186          * So, new open will not use cached handle.
5187          */
5188
5189         if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5190                 cifs_close_deferred_file(cinode);
5191
5192         persistent_fid = cfile->fid.persistent_fid;
5193         volatile_fid = cfile->fid.volatile_fid;
5194         net_fid = cfile->fid.netfid;
5195         oplock_break_cancelled = cfile->oplock_break_cancelled;
5196
5197         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5198         /*
5199          * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5200          * an acknowledgment to be sent when the file has already been closed.
5201          */
5202         spin_lock(&cinode->open_file_lock);
5203         /* check list empty since can race with kill_sb calling tree disconnect */
5204         if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5205                 spin_unlock(&cinode->open_file_lock);
5206                 rc = server->ops->oplock_response(tcon, persistent_fid,
5207                                                   volatile_fid, net_fid, cinode);
5208                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5209         } else
5210                 spin_unlock(&cinode->open_file_lock);
5211
5212         cifs_put_tlink(tlink);
5213 out:
5214         cifs_done_oplock_break(cinode);
5215 }
5216
5217 /*
5218  * The presence of cifs_direct_io() in the address space ops vector
5219  * allowes open() O_DIRECT flags which would have failed otherwise.
5220  *
5221  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5222  * so this method should never be called.
5223  *
5224  * Direct IO is not yet supported in the cached mode. 
5225  */
5226 static ssize_t
5227 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5228 {
5229         /*
5230          * FIXME
5231          * Eventually need to support direct IO for non forcedirectio mounts
5232          */
5233         return -EINVAL;
5234 }
5235
5236 static int cifs_swap_activate(struct swap_info_struct *sis,
5237                               struct file *swap_file, sector_t *span)
5238 {
5239         struct cifsFileInfo *cfile = swap_file->private_data;
5240         struct inode *inode = swap_file->f_mapping->host;
5241         unsigned long blocks;
5242         long long isize;
5243
5244         cifs_dbg(FYI, "swap activate\n");
5245
5246         spin_lock(&inode->i_lock);
5247         blocks = inode->i_blocks;
5248         isize = inode->i_size;
5249         spin_unlock(&inode->i_lock);
5250         if (blocks*512 < isize) {
5251                 pr_warn("swap activate: swapfile has holes\n");
5252                 return -EINVAL;
5253         }
5254         *span = sis->pages;
5255
5256         pr_warn_once("Swap support over SMB3 is experimental\n");
5257
5258         /*
5259          * TODO: consider adding ACL (or documenting how) to prevent other
5260          * users (on this or other systems) from reading it
5261          */
5262
5263
5264         /* TODO: add sk_set_memalloc(inet) or similar */
5265
5266         if (cfile)
5267                 cfile->swapfile = true;
5268         /*
5269          * TODO: Since file already open, we can't open with DENY_ALL here
5270          * but we could add call to grab a byte range lock to prevent others
5271          * from reading or writing the file
5272          */
5273
5274         return 0;
5275 }
5276
5277 static void cifs_swap_deactivate(struct file *file)
5278 {
5279         struct cifsFileInfo *cfile = file->private_data;
5280
5281         cifs_dbg(FYI, "swap deactivate\n");
5282
5283         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5284
5285         if (cfile)
5286                 cfile->swapfile = false;
5287
5288         /* do we need to unpin (or unlock) the file */
5289 }
5290
5291 const struct address_space_operations cifs_addr_ops = {
5292         .readpage = cifs_readpage,
5293         .readpages = cifs_readpages,
5294         .writepage = cifs_writepage,
5295         .writepages = cifs_writepages,
5296         .write_begin = cifs_write_begin,
5297         .write_end = cifs_write_end,
5298         .set_page_dirty = __set_page_dirty_nobuffers,
5299         .releasepage = cifs_release_page,
5300         .direct_IO = cifs_direct_io,
5301         .invalidatepage = cifs_invalidate_page,
5302         .launder_page = cifs_launder_page,
5303         /*
5304          * TODO: investigate and if useful we could add an cifs_migratePage
5305          * helper (under an CONFIG_MIGRATION) in the future, and also
5306          * investigate and add an is_dirty_writeback helper if needed
5307          */
5308         .swap_activate = cifs_swap_activate,
5309         .swap_deactivate = cifs_swap_deactivate,
5310 };
5311
5312 /*
5313  * cifs_readpages requires the server to support a buffer large enough to
5314  * contain the header plus one complete page of data.  Otherwise, we need
5315  * to leave cifs_readpages out of the address space operations.
5316  */
5317 const struct address_space_operations cifs_addr_ops_smallbuf = {
5318         .readpage = cifs_readpage,
5319         .writepage = cifs_writepage,
5320         .writepages = cifs_writepages,
5321         .write_begin = cifs_write_begin,
5322         .write_end = cifs_write_end,
5323         .set_page_dirty = __set_page_dirty_nobuffers,
5324         .releasepage = cifs_release_page,
5325         .invalidatepage = cifs_invalidate_page,
5326         .launder_page = cifs_launder_page,
5327 };