cifs: Implement splice_read to pass down ITER_BVEC not ITER_PIPE
[sfrench/cifs-2.6.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "smb2proto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37 #include "cached_dir.h"
38
39 /*
40  * Mark as invalid, all open files on tree connections since they
41  * were closed when session to server was lost.
42  */
43 void
44 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45 {
46         struct cifsFileInfo *open_file = NULL;
47         struct list_head *tmp;
48         struct list_head *tmp1;
49
50         /* only send once per connect */
51         spin_lock(&tcon->ses->ses_lock);
52         if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
53                 spin_unlock(&tcon->ses->ses_lock);
54                 return;
55         }
56         tcon->status = TID_IN_FILES_INVALIDATE;
57         spin_unlock(&tcon->ses->ses_lock);
58
59         /* list all files open on tree connection and mark them invalid */
60         spin_lock(&tcon->open_file_lock);
61         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63                 open_file->invalidHandle = true;
64                 open_file->oplock_break_cancelled = true;
65         }
66         spin_unlock(&tcon->open_file_lock);
67
68         invalidate_all_cached_dirs(tcon);
69         spin_lock(&tcon->tc_lock);
70         if (tcon->status == TID_IN_FILES_INVALIDATE)
71                 tcon->status = TID_NEED_TCON;
72         spin_unlock(&tcon->tc_lock);
73
74         /*
75          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76          * to this tcon.
77          */
78 }
79
80 static inline int cifs_convert_flags(unsigned int flags)
81 {
82         if ((flags & O_ACCMODE) == O_RDONLY)
83                 return GENERIC_READ;
84         else if ((flags & O_ACCMODE) == O_WRONLY)
85                 return GENERIC_WRITE;
86         else if ((flags & O_ACCMODE) == O_RDWR) {
87                 /* GENERIC_ALL is too much permission to request
88                    can cause unnecessary access denied on create */
89                 /* return GENERIC_ALL; */
90                 return (GENERIC_READ | GENERIC_WRITE);
91         }
92
93         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95                 FILE_READ_DATA);
96 }
97
98 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99 static u32 cifs_posix_convert_flags(unsigned int flags)
100 {
101         u32 posix_flags = 0;
102
103         if ((flags & O_ACCMODE) == O_RDONLY)
104                 posix_flags = SMB_O_RDONLY;
105         else if ((flags & O_ACCMODE) == O_WRONLY)
106                 posix_flags = SMB_O_WRONLY;
107         else if ((flags & O_ACCMODE) == O_RDWR)
108                 posix_flags = SMB_O_RDWR;
109
110         if (flags & O_CREAT) {
111                 posix_flags |= SMB_O_CREAT;
112                 if (flags & O_EXCL)
113                         posix_flags |= SMB_O_EXCL;
114         } else if (flags & O_EXCL)
115                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116                          current->comm, current->tgid);
117
118         if (flags & O_TRUNC)
119                 posix_flags |= SMB_O_TRUNC;
120         /* be safe and imply O_SYNC for O_DSYNC */
121         if (flags & O_DSYNC)
122                 posix_flags |= SMB_O_SYNC;
123         if (flags & O_DIRECTORY)
124                 posix_flags |= SMB_O_DIRECTORY;
125         if (flags & O_NOFOLLOW)
126                 posix_flags |= SMB_O_NOFOLLOW;
127         if (flags & O_DIRECT)
128                 posix_flags |= SMB_O_DIRECT;
129
130         return posix_flags;
131 }
132 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134 static inline int cifs_get_disposition(unsigned int flags)
135 {
136         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137                 return FILE_CREATE;
138         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139                 return FILE_OVERWRITE_IF;
140         else if ((flags & O_CREAT) == O_CREAT)
141                 return FILE_OPEN_IF;
142         else if ((flags & O_TRUNC) == O_TRUNC)
143                 return FILE_OVERWRITE;
144         else
145                 return FILE_OPEN;
146 }
147
148 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149 int cifs_posix_open(const char *full_path, struct inode **pinode,
150                         struct super_block *sb, int mode, unsigned int f_flags,
151                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152 {
153         int rc;
154         FILE_UNIX_BASIC_INFO *presp_data;
155         __u32 posix_flags = 0;
156         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157         struct cifs_fattr fattr;
158         struct tcon_link *tlink;
159         struct cifs_tcon *tcon;
160
161         cifs_dbg(FYI, "posix open %s\n", full_path);
162
163         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164         if (presp_data == NULL)
165                 return -ENOMEM;
166
167         tlink = cifs_sb_tlink(cifs_sb);
168         if (IS_ERR(tlink)) {
169                 rc = PTR_ERR(tlink);
170                 goto posix_open_ret;
171         }
172
173         tcon = tlink_tcon(tlink);
174         mode &= ~current_umask();
175
176         posix_flags = cifs_posix_convert_flags(f_flags);
177         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178                              poplock, full_path, cifs_sb->local_nls,
179                              cifs_remap(cifs_sb));
180         cifs_put_tlink(tlink);
181
182         if (rc)
183                 goto posix_open_ret;
184
185         if (presp_data->Type == cpu_to_le32(-1))
186                 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188         if (!pinode)
189                 goto posix_open_ret; /* caller does not need info */
190
191         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193         /* get new inode and set it up */
194         if (*pinode == NULL) {
195                 cifs_fill_uniqueid(sb, &fattr);
196                 *pinode = cifs_iget(sb, &fattr);
197                 if (!*pinode) {
198                         rc = -ENOMEM;
199                         goto posix_open_ret;
200                 }
201         } else {
202                 cifs_revalidate_mapping(*pinode);
203                 rc = cifs_fattr_to_inode(*pinode, &fattr);
204         }
205
206 posix_open_ret:
207         kfree(presp_data);
208         return rc;
209 }
210 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
213                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
214                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
215 {
216         int rc;
217         int desired_access;
218         int disposition;
219         int create_options = CREATE_NOT_DIR;
220         struct TCP_Server_Info *server = tcon->ses->server;
221         struct cifs_open_parms oparms;
222
223         if (!server->ops->open)
224                 return -ENOSYS;
225
226         desired_access = cifs_convert_flags(f_flags);
227
228 /*********************************************************************
229  *  open flag mapping table:
230  *
231  *      POSIX Flag            CIFS Disposition
232  *      ----------            ----------------
233  *      O_CREAT               FILE_OPEN_IF
234  *      O_CREAT | O_EXCL      FILE_CREATE
235  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
236  *      O_TRUNC               FILE_OVERWRITE
237  *      none of the above     FILE_OPEN
238  *
239  *      Note that there is not a direct match between disposition
240  *      FILE_SUPERSEDE (ie create whether or not file exists although
241  *      O_CREAT | O_TRUNC is similar but truncates the existing
242  *      file rather than creating a new file as FILE_SUPERSEDE does
243  *      (which uses the attributes / metadata passed in on open call)
244  *?
245  *?  O_SYNC is a reasonable match to CIFS writethrough flag
246  *?  and the read write flags match reasonably.  O_LARGEFILE
247  *?  is irrelevant because largefile support is always used
248  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
249  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
250  *********************************************************************/
251
252         disposition = cifs_get_disposition(f_flags);
253
254         /* BB pass O_SYNC flag through on file attributes .. BB */
255
256         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
257         if (f_flags & O_SYNC)
258                 create_options |= CREATE_WRITE_THROUGH;
259
260         if (f_flags & O_DIRECT)
261                 create_options |= CREATE_NO_BUFFER;
262
263         oparms = (struct cifs_open_parms) {
264                 .tcon = tcon,
265                 .cifs_sb = cifs_sb,
266                 .desired_access = desired_access,
267                 .create_options = cifs_create_options(cifs_sb, create_options),
268                 .disposition = disposition,
269                 .path = full_path,
270                 .fid = fid,
271         };
272
273         rc = server->ops->open(xid, &oparms, oplock, buf);
274         if (rc)
275                 return rc;
276
277         /* TODO: Add support for calling posix query info but with passing in fid */
278         if (tcon->unix_ext)
279                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
280                                               xid);
281         else
282                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
283                                          xid, fid);
284
285         if (rc) {
286                 server->ops->close(xid, tcon, fid);
287                 if (rc == -ESTALE)
288                         rc = -EOPENSTALE;
289         }
290
291         return rc;
292 }
293
294 static bool
295 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
296 {
297         struct cifs_fid_locks *cur;
298         bool has_locks = false;
299
300         down_read(&cinode->lock_sem);
301         list_for_each_entry(cur, &cinode->llist, llist) {
302                 if (!list_empty(&cur->locks)) {
303                         has_locks = true;
304                         break;
305                 }
306         }
307         up_read(&cinode->lock_sem);
308         return has_locks;
309 }
310
311 void
312 cifs_down_write(struct rw_semaphore *sem)
313 {
314         while (!down_write_trylock(sem))
315                 msleep(10);
316 }
317
318 static void cifsFileInfo_put_work(struct work_struct *work);
319
320 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
321                                        struct tcon_link *tlink, __u32 oplock,
322                                        const char *symlink_target)
323 {
324         struct dentry *dentry = file_dentry(file);
325         struct inode *inode = d_inode(dentry);
326         struct cifsInodeInfo *cinode = CIFS_I(inode);
327         struct cifsFileInfo *cfile;
328         struct cifs_fid_locks *fdlocks;
329         struct cifs_tcon *tcon = tlink_tcon(tlink);
330         struct TCP_Server_Info *server = tcon->ses->server;
331
332         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
333         if (cfile == NULL)
334                 return cfile;
335
336         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
337         if (!fdlocks) {
338                 kfree(cfile);
339                 return NULL;
340         }
341
342         if (symlink_target) {
343                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
344                 if (!cfile->symlink_target) {
345                         kfree(fdlocks);
346                         kfree(cfile);
347                         return NULL;
348                 }
349         }
350
351         INIT_LIST_HEAD(&fdlocks->locks);
352         fdlocks->cfile = cfile;
353         cfile->llist = fdlocks;
354
355         cfile->count = 1;
356         cfile->pid = current->tgid;
357         cfile->uid = current_fsuid();
358         cfile->dentry = dget(dentry);
359         cfile->f_flags = file->f_flags;
360         cfile->invalidHandle = false;
361         cfile->deferred_close_scheduled = false;
362         cfile->tlink = cifs_get_tlink(tlink);
363         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
364         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
365         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
366         mutex_init(&cfile->fh_mutex);
367         spin_lock_init(&cfile->file_info_lock);
368
369         cifs_sb_active(inode->i_sb);
370
371         /*
372          * If the server returned a read oplock and we have mandatory brlocks,
373          * set oplock level to None.
374          */
375         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
376                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
377                 oplock = 0;
378         }
379
380         cifs_down_write(&cinode->lock_sem);
381         list_add(&fdlocks->llist, &cinode->llist);
382         up_write(&cinode->lock_sem);
383
384         spin_lock(&tcon->open_file_lock);
385         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
386                 oplock = fid->pending_open->oplock;
387         list_del(&fid->pending_open->olist);
388
389         fid->purge_cache = false;
390         server->ops->set_fid(cfile, fid, oplock);
391
392         list_add(&cfile->tlist, &tcon->openFileList);
393         atomic_inc(&tcon->num_local_opens);
394
395         /* if readable file instance put first in list*/
396         spin_lock(&cinode->open_file_lock);
397         if (file->f_mode & FMODE_READ)
398                 list_add(&cfile->flist, &cinode->openFileList);
399         else
400                 list_add_tail(&cfile->flist, &cinode->openFileList);
401         spin_unlock(&cinode->open_file_lock);
402         spin_unlock(&tcon->open_file_lock);
403
404         if (fid->purge_cache)
405                 cifs_zap_mapping(inode);
406
407         file->private_data = cfile;
408         return cfile;
409 }
410
411 struct cifsFileInfo *
412 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
413 {
414         spin_lock(&cifs_file->file_info_lock);
415         cifsFileInfo_get_locked(cifs_file);
416         spin_unlock(&cifs_file->file_info_lock);
417         return cifs_file;
418 }
419
420 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
421 {
422         struct inode *inode = d_inode(cifs_file->dentry);
423         struct cifsInodeInfo *cifsi = CIFS_I(inode);
424         struct cifsLockInfo *li, *tmp;
425         struct super_block *sb = inode->i_sb;
426
427         /*
428          * Delete any outstanding lock records. We'll lose them when the file
429          * is closed anyway.
430          */
431         cifs_down_write(&cifsi->lock_sem);
432         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
433                 list_del(&li->llist);
434                 cifs_del_lock_waiters(li);
435                 kfree(li);
436         }
437         list_del(&cifs_file->llist->llist);
438         kfree(cifs_file->llist);
439         up_write(&cifsi->lock_sem);
440
441         cifs_put_tlink(cifs_file->tlink);
442         dput(cifs_file->dentry);
443         cifs_sb_deactive(sb);
444         kfree(cifs_file->symlink_target);
445         kfree(cifs_file);
446 }
447
448 static void cifsFileInfo_put_work(struct work_struct *work)
449 {
450         struct cifsFileInfo *cifs_file = container_of(work,
451                         struct cifsFileInfo, put);
452
453         cifsFileInfo_put_final(cifs_file);
454 }
455
456 /**
457  * cifsFileInfo_put - release a reference of file priv data
458  *
459  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
460  *
461  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
462  */
463 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
464 {
465         _cifsFileInfo_put(cifs_file, true, true);
466 }
467
468 /**
469  * _cifsFileInfo_put - release a reference of file priv data
470  *
471  * This may involve closing the filehandle @cifs_file out on the
472  * server. Must be called without holding tcon->open_file_lock,
473  * cinode->open_file_lock and cifs_file->file_info_lock.
474  *
475  * If @wait_for_oplock_handler is true and we are releasing the last
476  * reference, wait for any running oplock break handler of the file
477  * and cancel any pending one.
478  *
479  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
480  * @wait_oplock_handler: must be false if called from oplock_break_handler
481  * @offload:    not offloaded on close and oplock breaks
482  *
483  */
484 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
485                        bool wait_oplock_handler, bool offload)
486 {
487         struct inode *inode = d_inode(cifs_file->dentry);
488         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
489         struct TCP_Server_Info *server = tcon->ses->server;
490         struct cifsInodeInfo *cifsi = CIFS_I(inode);
491         struct super_block *sb = inode->i_sb;
492         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
493         struct cifs_fid fid = {};
494         struct cifs_pending_open open;
495         bool oplock_break_cancelled;
496
497         spin_lock(&tcon->open_file_lock);
498         spin_lock(&cifsi->open_file_lock);
499         spin_lock(&cifs_file->file_info_lock);
500         if (--cifs_file->count > 0) {
501                 spin_unlock(&cifs_file->file_info_lock);
502                 spin_unlock(&cifsi->open_file_lock);
503                 spin_unlock(&tcon->open_file_lock);
504                 return;
505         }
506         spin_unlock(&cifs_file->file_info_lock);
507
508         if (server->ops->get_lease_key)
509                 server->ops->get_lease_key(inode, &fid);
510
511         /* store open in pending opens to make sure we don't miss lease break */
512         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
513
514         /* remove it from the lists */
515         list_del(&cifs_file->flist);
516         list_del(&cifs_file->tlist);
517         atomic_dec(&tcon->num_local_opens);
518
519         if (list_empty(&cifsi->openFileList)) {
520                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
521                          d_inode(cifs_file->dentry));
522                 /*
523                  * In strict cache mode we need invalidate mapping on the last
524                  * close  because it may cause a error when we open this file
525                  * again and get at least level II oplock.
526                  */
527                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
528                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
529                 cifs_set_oplock_level(cifsi, 0);
530         }
531
532         spin_unlock(&cifsi->open_file_lock);
533         spin_unlock(&tcon->open_file_lock);
534
535         oplock_break_cancelled = wait_oplock_handler ?
536                 cancel_work_sync(&cifs_file->oplock_break) : false;
537
538         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
539                 struct TCP_Server_Info *server = tcon->ses->server;
540                 unsigned int xid;
541
542                 xid = get_xid();
543                 if (server->ops->close_getattr)
544                         server->ops->close_getattr(xid, tcon, cifs_file);
545                 else if (server->ops->close)
546                         server->ops->close(xid, tcon, &cifs_file->fid);
547                 _free_xid(xid);
548         }
549
550         if (oplock_break_cancelled)
551                 cifs_done_oplock_break(cifsi);
552
553         cifs_del_pending_open(&open);
554
555         if (offload)
556                 queue_work(fileinfo_put_wq, &cifs_file->put);
557         else
558                 cifsFileInfo_put_final(cifs_file);
559 }
560
561 int cifs_open(struct inode *inode, struct file *file)
562
563 {
564         int rc = -EACCES;
565         unsigned int xid;
566         __u32 oplock;
567         struct cifs_sb_info *cifs_sb;
568         struct TCP_Server_Info *server;
569         struct cifs_tcon *tcon;
570         struct tcon_link *tlink;
571         struct cifsFileInfo *cfile = NULL;
572         void *page;
573         const char *full_path;
574         bool posix_open_ok = false;
575         struct cifs_fid fid = {};
576         struct cifs_pending_open open;
577         struct cifs_open_info_data data = {};
578
579         xid = get_xid();
580
581         cifs_sb = CIFS_SB(inode->i_sb);
582         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
583                 free_xid(xid);
584                 return -EIO;
585         }
586
587         tlink = cifs_sb_tlink(cifs_sb);
588         if (IS_ERR(tlink)) {
589                 free_xid(xid);
590                 return PTR_ERR(tlink);
591         }
592         tcon = tlink_tcon(tlink);
593         server = tcon->ses->server;
594
595         page = alloc_dentry_path();
596         full_path = build_path_from_dentry(file_dentry(file), page);
597         if (IS_ERR(full_path)) {
598                 rc = PTR_ERR(full_path);
599                 goto out;
600         }
601
602         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
603                  inode, file->f_flags, full_path);
604
605         if (file->f_flags & O_DIRECT &&
606             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
607                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
608                         file->f_op = &cifs_file_direct_nobrl_ops;
609                 else
610                         file->f_op = &cifs_file_direct_ops;
611         }
612
613         /* Get the cached handle as SMB2 close is deferred */
614         rc = cifs_get_readable_path(tcon, full_path, &cfile);
615         if (rc == 0) {
616                 if (file->f_flags == cfile->f_flags) {
617                         file->private_data = cfile;
618                         spin_lock(&CIFS_I(inode)->deferred_lock);
619                         cifs_del_deferred_close(cfile);
620                         spin_unlock(&CIFS_I(inode)->deferred_lock);
621                         goto use_cache;
622                 } else {
623                         _cifsFileInfo_put(cfile, true, false);
624                 }
625         }
626
627         if (server->oplocks)
628                 oplock = REQ_OPLOCK;
629         else
630                 oplock = 0;
631
632 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
633         if (!tcon->broken_posix_open && tcon->unix_ext &&
634             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
635                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
636                 /* can not refresh inode info since size could be stale */
637                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
638                                 cifs_sb->ctx->file_mode /* ignored */,
639                                 file->f_flags, &oplock, &fid.netfid, xid);
640                 if (rc == 0) {
641                         cifs_dbg(FYI, "posix open succeeded\n");
642                         posix_open_ok = true;
643                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
644                         if (tcon->ses->serverNOS)
645                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
646                                          tcon->ses->ip_addr,
647                                          tcon->ses->serverNOS);
648                         tcon->broken_posix_open = true;
649                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
650                          (rc != -EOPNOTSUPP)) /* path not found or net err */
651                         goto out;
652                 /*
653                  * Else fallthrough to retry open the old way on network i/o
654                  * or DFS errors.
655                  */
656         }
657 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
658
659         if (server->ops->get_lease_key)
660                 server->ops->get_lease_key(inode, &fid);
661
662         cifs_add_pending_open(&fid, tlink, &open);
663
664         if (!posix_open_ok) {
665                 if (server->ops->get_lease_key)
666                         server->ops->get_lease_key(inode, &fid);
667
668                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
669                                   xid, &data);
670                 if (rc) {
671                         cifs_del_pending_open(&open);
672                         goto out;
673                 }
674         }
675
676         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
677         if (cfile == NULL) {
678                 if (server->ops->close)
679                         server->ops->close(xid, tcon, &fid);
680                 cifs_del_pending_open(&open);
681                 rc = -ENOMEM;
682                 goto out;
683         }
684
685 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
686         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
687                 /*
688                  * Time to set mode which we can not set earlier due to
689                  * problems creating new read-only files.
690                  */
691                 struct cifs_unix_set_info_args args = {
692                         .mode   = inode->i_mode,
693                         .uid    = INVALID_UID, /* no change */
694                         .gid    = INVALID_GID, /* no change */
695                         .ctime  = NO_CHANGE_64,
696                         .atime  = NO_CHANGE_64,
697                         .mtime  = NO_CHANGE_64,
698                         .device = 0,
699                 };
700                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
701                                        cfile->pid);
702         }
703 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
704
705 use_cache:
706         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
707                            file->f_mode & FMODE_WRITE);
708         if (file->f_flags & O_DIRECT &&
709             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
710              file->f_flags & O_APPEND))
711                 cifs_invalidate_cache(file_inode(file),
712                                       FSCACHE_INVAL_DIO_WRITE);
713
714 out:
715         free_dentry_path(page);
716         free_xid(xid);
717         cifs_put_tlink(tlink);
718         cifs_free_open_info(&data);
719         return rc;
720 }
721
722 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
723 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
724 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
725
726 /*
727  * Try to reacquire byte range locks that were released when session
728  * to server was lost.
729  */
730 static int
731 cifs_relock_file(struct cifsFileInfo *cfile)
732 {
733         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
734         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
735         int rc = 0;
736 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
737         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
738 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
739
740         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
741         if (cinode->can_cache_brlcks) {
742                 /* can cache locks - no need to relock */
743                 up_read(&cinode->lock_sem);
744                 return rc;
745         }
746
747 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
748         if (cap_unix(tcon->ses) &&
749             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
750             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
751                 rc = cifs_push_posix_locks(cfile);
752         else
753 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
754                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
755
756         up_read(&cinode->lock_sem);
757         return rc;
758 }
759
760 static int
761 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
762 {
763         int rc = -EACCES;
764         unsigned int xid;
765         __u32 oplock;
766         struct cifs_sb_info *cifs_sb;
767         struct cifs_tcon *tcon;
768         struct TCP_Server_Info *server;
769         struct cifsInodeInfo *cinode;
770         struct inode *inode;
771         void *page;
772         const char *full_path;
773         int desired_access;
774         int disposition = FILE_OPEN;
775         int create_options = CREATE_NOT_DIR;
776         struct cifs_open_parms oparms;
777
778         xid = get_xid();
779         mutex_lock(&cfile->fh_mutex);
780         if (!cfile->invalidHandle) {
781                 mutex_unlock(&cfile->fh_mutex);
782                 free_xid(xid);
783                 return 0;
784         }
785
786         inode = d_inode(cfile->dentry);
787         cifs_sb = CIFS_SB(inode->i_sb);
788         tcon = tlink_tcon(cfile->tlink);
789         server = tcon->ses->server;
790
791         /*
792          * Can not grab rename sem here because various ops, including those
793          * that already have the rename sem can end up causing writepage to get
794          * called and if the server was down that means we end up here, and we
795          * can never tell if the caller already has the rename_sem.
796          */
797         page = alloc_dentry_path();
798         full_path = build_path_from_dentry(cfile->dentry, page);
799         if (IS_ERR(full_path)) {
800                 mutex_unlock(&cfile->fh_mutex);
801                 free_dentry_path(page);
802                 free_xid(xid);
803                 return PTR_ERR(full_path);
804         }
805
806         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
807                  inode, cfile->f_flags, full_path);
808
809         if (tcon->ses->server->oplocks)
810                 oplock = REQ_OPLOCK;
811         else
812                 oplock = 0;
813
814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
815         if (tcon->unix_ext && cap_unix(tcon->ses) &&
816             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
817                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
818                 /*
819                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
820                  * original open. Must mask them off for a reopen.
821                  */
822                 unsigned int oflags = cfile->f_flags &
823                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
824
825                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
826                                      cifs_sb->ctx->file_mode /* ignored */,
827                                      oflags, &oplock, &cfile->fid.netfid, xid);
828                 if (rc == 0) {
829                         cifs_dbg(FYI, "posix reopen succeeded\n");
830                         oparms.reconnect = true;
831                         goto reopen_success;
832                 }
833                 /*
834                  * fallthrough to retry open the old way on errors, especially
835                  * in the reconnect path it is important to retry hard
836                  */
837         }
838 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
839
840         desired_access = cifs_convert_flags(cfile->f_flags);
841
842         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
843         if (cfile->f_flags & O_SYNC)
844                 create_options |= CREATE_WRITE_THROUGH;
845
846         if (cfile->f_flags & O_DIRECT)
847                 create_options |= CREATE_NO_BUFFER;
848
849         if (server->ops->get_lease_key)
850                 server->ops->get_lease_key(inode, &cfile->fid);
851
852         oparms = (struct cifs_open_parms) {
853                 .tcon = tcon,
854                 .cifs_sb = cifs_sb,
855                 .desired_access = desired_access,
856                 .create_options = cifs_create_options(cifs_sb, create_options),
857                 .disposition = disposition,
858                 .path = full_path,
859                 .fid = &cfile->fid,
860                 .reconnect = true,
861         };
862
863         /*
864          * Can not refresh inode by passing in file_info buf to be returned by
865          * ops->open and then calling get_inode_info with returned buf since
866          * file might have write behind data that needs to be flushed and server
867          * version of file size can be stale. If we knew for sure that inode was
868          * not dirty locally we could do this.
869          */
870         rc = server->ops->open(xid, &oparms, &oplock, NULL);
871         if (rc == -ENOENT && oparms.reconnect == false) {
872                 /* durable handle timeout is expired - open the file again */
873                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
874                 /* indicate that we need to relock the file */
875                 oparms.reconnect = true;
876         }
877
878         if (rc) {
879                 mutex_unlock(&cfile->fh_mutex);
880                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
881                 cifs_dbg(FYI, "oplock: %d\n", oplock);
882                 goto reopen_error_exit;
883         }
884
885 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
886 reopen_success:
887 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
888         cfile->invalidHandle = false;
889         mutex_unlock(&cfile->fh_mutex);
890         cinode = CIFS_I(inode);
891
892         if (can_flush) {
893                 rc = filemap_write_and_wait(inode->i_mapping);
894                 if (!is_interrupt_error(rc))
895                         mapping_set_error(inode->i_mapping, rc);
896
897                 if (tcon->posix_extensions)
898                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
899                 else if (tcon->unix_ext)
900                         rc = cifs_get_inode_info_unix(&inode, full_path,
901                                                       inode->i_sb, xid);
902                 else
903                         rc = cifs_get_inode_info(&inode, full_path, NULL,
904                                                  inode->i_sb, xid, NULL);
905         }
906         /*
907          * Else we are writing out data to server already and could deadlock if
908          * we tried to flush data, and since we do not know if we have data that
909          * would invalidate the current end of file on the server we can not go
910          * to the server to get the new inode info.
911          */
912
913         /*
914          * If the server returned a read oplock and we have mandatory brlocks,
915          * set oplock level to None.
916          */
917         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
918                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
919                 oplock = 0;
920         }
921
922         server->ops->set_fid(cfile, &cfile->fid, oplock);
923         if (oparms.reconnect)
924                 cifs_relock_file(cfile);
925
926 reopen_error_exit:
927         free_dentry_path(page);
928         free_xid(xid);
929         return rc;
930 }
931
932 void smb2_deferred_work_close(struct work_struct *work)
933 {
934         struct cifsFileInfo *cfile = container_of(work,
935                         struct cifsFileInfo, deferred.work);
936
937         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
938         cifs_del_deferred_close(cfile);
939         cfile->deferred_close_scheduled = false;
940         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
941         _cifsFileInfo_put(cfile, true, false);
942 }
943
944 int cifs_close(struct inode *inode, struct file *file)
945 {
946         struct cifsFileInfo *cfile;
947         struct cifsInodeInfo *cinode = CIFS_I(inode);
948         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
949         struct cifs_deferred_close *dclose;
950
951         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
952
953         if (file->private_data != NULL) {
954                 cfile = file->private_data;
955                 file->private_data = NULL;
956                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
957                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
958                     cinode->lease_granted &&
959                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
960                     dclose) {
961                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
962                                 inode->i_ctime = inode->i_mtime = current_time(inode);
963                         }
964                         spin_lock(&cinode->deferred_lock);
965                         cifs_add_deferred_close(cfile, dclose);
966                         if (cfile->deferred_close_scheduled &&
967                             delayed_work_pending(&cfile->deferred)) {
968                                 /*
969                                  * If there is no pending work, mod_delayed_work queues new work.
970                                  * So, Increase the ref count to avoid use-after-free.
971                                  */
972                                 if (!mod_delayed_work(deferredclose_wq,
973                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
974                                         cifsFileInfo_get(cfile);
975                         } else {
976                                 /* Deferred close for files */
977                                 queue_delayed_work(deferredclose_wq,
978                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
979                                 cfile->deferred_close_scheduled = true;
980                                 spin_unlock(&cinode->deferred_lock);
981                                 return 0;
982                         }
983                         spin_unlock(&cinode->deferred_lock);
984                         _cifsFileInfo_put(cfile, true, false);
985                 } else {
986                         _cifsFileInfo_put(cfile, true, false);
987                         kfree(dclose);
988                 }
989         }
990
991         /* return code from the ->release op is always ignored */
992         return 0;
993 }
994
995 void
996 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
997 {
998         struct cifsFileInfo *open_file, *tmp;
999         struct list_head tmp_list;
1000
1001         if (!tcon->use_persistent || !tcon->need_reopen_files)
1002                 return;
1003
1004         tcon->need_reopen_files = false;
1005
1006         cifs_dbg(FYI, "Reopen persistent handles\n");
1007         INIT_LIST_HEAD(&tmp_list);
1008
1009         /* list all files open on tree connection, reopen resilient handles  */
1010         spin_lock(&tcon->open_file_lock);
1011         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1012                 if (!open_file->invalidHandle)
1013                         continue;
1014                 cifsFileInfo_get(open_file);
1015                 list_add_tail(&open_file->rlist, &tmp_list);
1016         }
1017         spin_unlock(&tcon->open_file_lock);
1018
1019         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1020                 if (cifs_reopen_file(open_file, false /* do not flush */))
1021                         tcon->need_reopen_files = true;
1022                 list_del_init(&open_file->rlist);
1023                 cifsFileInfo_put(open_file);
1024         }
1025 }
1026
1027 int cifs_closedir(struct inode *inode, struct file *file)
1028 {
1029         int rc = 0;
1030         unsigned int xid;
1031         struct cifsFileInfo *cfile = file->private_data;
1032         struct cifs_tcon *tcon;
1033         struct TCP_Server_Info *server;
1034         char *buf;
1035
1036         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1037
1038         if (cfile == NULL)
1039                 return rc;
1040
1041         xid = get_xid();
1042         tcon = tlink_tcon(cfile->tlink);
1043         server = tcon->ses->server;
1044
1045         cifs_dbg(FYI, "Freeing private data in close dir\n");
1046         spin_lock(&cfile->file_info_lock);
1047         if (server->ops->dir_needs_close(cfile)) {
1048                 cfile->invalidHandle = true;
1049                 spin_unlock(&cfile->file_info_lock);
1050                 if (server->ops->close_dir)
1051                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1052                 else
1053                         rc = -ENOSYS;
1054                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1055                 /* not much we can do if it fails anyway, ignore rc */
1056                 rc = 0;
1057         } else
1058                 spin_unlock(&cfile->file_info_lock);
1059
1060         buf = cfile->srch_inf.ntwrk_buf_start;
1061         if (buf) {
1062                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1063                 cfile->srch_inf.ntwrk_buf_start = NULL;
1064                 if (cfile->srch_inf.smallBuf)
1065                         cifs_small_buf_release(buf);
1066                 else
1067                         cifs_buf_release(buf);
1068         }
1069
1070         cifs_put_tlink(cfile->tlink);
1071         kfree(file->private_data);
1072         file->private_data = NULL;
1073         /* BB can we lock the filestruct while this is going on? */
1074         free_xid(xid);
1075         return rc;
1076 }
1077
1078 static struct cifsLockInfo *
1079 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1080 {
1081         struct cifsLockInfo *lock =
1082                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1083         if (!lock)
1084                 return lock;
1085         lock->offset = offset;
1086         lock->length = length;
1087         lock->type = type;
1088         lock->pid = current->tgid;
1089         lock->flags = flags;
1090         INIT_LIST_HEAD(&lock->blist);
1091         init_waitqueue_head(&lock->block_q);
1092         return lock;
1093 }
1094
1095 void
1096 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1097 {
1098         struct cifsLockInfo *li, *tmp;
1099         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1100                 list_del_init(&li->blist);
1101                 wake_up(&li->block_q);
1102         }
1103 }
1104
1105 #define CIFS_LOCK_OP    0
1106 #define CIFS_READ_OP    1
1107 #define CIFS_WRITE_OP   2
1108
1109 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1110 static bool
1111 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1112                             __u64 length, __u8 type, __u16 flags,
1113                             struct cifsFileInfo *cfile,
1114                             struct cifsLockInfo **conf_lock, int rw_check)
1115 {
1116         struct cifsLockInfo *li;
1117         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1118         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1119
1120         list_for_each_entry(li, &fdlocks->locks, llist) {
1121                 if (offset + length <= li->offset ||
1122                     offset >= li->offset + li->length)
1123                         continue;
1124                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1125                     server->ops->compare_fids(cfile, cur_cfile)) {
1126                         /* shared lock prevents write op through the same fid */
1127                         if (!(li->type & server->vals->shared_lock_type) ||
1128                             rw_check != CIFS_WRITE_OP)
1129                                 continue;
1130                 }
1131                 if ((type & server->vals->shared_lock_type) &&
1132                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1133                      current->tgid == li->pid) || type == li->type))
1134                         continue;
1135                 if (rw_check == CIFS_LOCK_OP &&
1136                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1137                     server->ops->compare_fids(cfile, cur_cfile))
1138                         continue;
1139                 if (conf_lock)
1140                         *conf_lock = li;
1141                 return true;
1142         }
1143         return false;
1144 }
1145
1146 bool
1147 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1148                         __u8 type, __u16 flags,
1149                         struct cifsLockInfo **conf_lock, int rw_check)
1150 {
1151         bool rc = false;
1152         struct cifs_fid_locks *cur;
1153         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1154
1155         list_for_each_entry(cur, &cinode->llist, llist) {
1156                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1157                                                  flags, cfile, conf_lock,
1158                                                  rw_check);
1159                 if (rc)
1160                         break;
1161         }
1162
1163         return rc;
1164 }
1165
1166 /*
1167  * Check if there is another lock that prevents us to set the lock (mandatory
1168  * style). If such a lock exists, update the flock structure with its
1169  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1170  * or leave it the same if we can't. Returns 0 if we don't need to request to
1171  * the server or 1 otherwise.
1172  */
1173 static int
1174 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1175                __u8 type, struct file_lock *flock)
1176 {
1177         int rc = 0;
1178         struct cifsLockInfo *conf_lock;
1179         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1180         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1181         bool exist;
1182
1183         down_read(&cinode->lock_sem);
1184
1185         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1186                                         flock->fl_flags, &conf_lock,
1187                                         CIFS_LOCK_OP);
1188         if (exist) {
1189                 flock->fl_start = conf_lock->offset;
1190                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1191                 flock->fl_pid = conf_lock->pid;
1192                 if (conf_lock->type & server->vals->shared_lock_type)
1193                         flock->fl_type = F_RDLCK;
1194                 else
1195                         flock->fl_type = F_WRLCK;
1196         } else if (!cinode->can_cache_brlcks)
1197                 rc = 1;
1198         else
1199                 flock->fl_type = F_UNLCK;
1200
1201         up_read(&cinode->lock_sem);
1202         return rc;
1203 }
1204
1205 static void
1206 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1207 {
1208         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1209         cifs_down_write(&cinode->lock_sem);
1210         list_add_tail(&lock->llist, &cfile->llist->locks);
1211         up_write(&cinode->lock_sem);
1212 }
1213
1214 /*
1215  * Set the byte-range lock (mandatory style). Returns:
1216  * 1) 0, if we set the lock and don't need to request to the server;
1217  * 2) 1, if no locks prevent us but we need to request to the server;
1218  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1219  */
1220 static int
1221 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1222                  bool wait)
1223 {
1224         struct cifsLockInfo *conf_lock;
1225         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1226         bool exist;
1227         int rc = 0;
1228
1229 try_again:
1230         exist = false;
1231         cifs_down_write(&cinode->lock_sem);
1232
1233         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1234                                         lock->type, lock->flags, &conf_lock,
1235                                         CIFS_LOCK_OP);
1236         if (!exist && cinode->can_cache_brlcks) {
1237                 list_add_tail(&lock->llist, &cfile->llist->locks);
1238                 up_write(&cinode->lock_sem);
1239                 return rc;
1240         }
1241
1242         if (!exist)
1243                 rc = 1;
1244         else if (!wait)
1245                 rc = -EACCES;
1246         else {
1247                 list_add_tail(&lock->blist, &conf_lock->blist);
1248                 up_write(&cinode->lock_sem);
1249                 rc = wait_event_interruptible(lock->block_q,
1250                                         (lock->blist.prev == &lock->blist) &&
1251                                         (lock->blist.next == &lock->blist));
1252                 if (!rc)
1253                         goto try_again;
1254                 cifs_down_write(&cinode->lock_sem);
1255                 list_del_init(&lock->blist);
1256         }
1257
1258         up_write(&cinode->lock_sem);
1259         return rc;
1260 }
1261
1262 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1263 /*
1264  * Check if there is another lock that prevents us to set the lock (posix
1265  * style). If such a lock exists, update the flock structure with its
1266  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1267  * or leave it the same if we can't. Returns 0 if we don't need to request to
1268  * the server or 1 otherwise.
1269  */
1270 static int
1271 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1272 {
1273         int rc = 0;
1274         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1275         unsigned char saved_type = flock->fl_type;
1276
1277         if ((flock->fl_flags & FL_POSIX) == 0)
1278                 return 1;
1279
1280         down_read(&cinode->lock_sem);
1281         posix_test_lock(file, flock);
1282
1283         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1284                 flock->fl_type = saved_type;
1285                 rc = 1;
1286         }
1287
1288         up_read(&cinode->lock_sem);
1289         return rc;
1290 }
1291
1292 /*
1293  * Set the byte-range lock (posix style). Returns:
1294  * 1) <0, if the error occurs while setting the lock;
1295  * 2) 0, if we set the lock and don't need to request to the server;
1296  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1297  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1298  */
1299 static int
1300 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1301 {
1302         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1303         int rc = FILE_LOCK_DEFERRED + 1;
1304
1305         if ((flock->fl_flags & FL_POSIX) == 0)
1306                 return rc;
1307
1308         cifs_down_write(&cinode->lock_sem);
1309         if (!cinode->can_cache_brlcks) {
1310                 up_write(&cinode->lock_sem);
1311                 return rc;
1312         }
1313
1314         rc = posix_lock_file(file, flock, NULL);
1315         up_write(&cinode->lock_sem);
1316         return rc;
1317 }
1318
1319 int
1320 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1321 {
1322         unsigned int xid;
1323         int rc = 0, stored_rc;
1324         struct cifsLockInfo *li, *tmp;
1325         struct cifs_tcon *tcon;
1326         unsigned int num, max_num, max_buf;
1327         LOCKING_ANDX_RANGE *buf, *cur;
1328         static const int types[] = {
1329                 LOCKING_ANDX_LARGE_FILES,
1330                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1331         };
1332         int i;
1333
1334         xid = get_xid();
1335         tcon = tlink_tcon(cfile->tlink);
1336
1337         /*
1338          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1339          * and check it before using.
1340          */
1341         max_buf = tcon->ses->server->maxBuf;
1342         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1343                 free_xid(xid);
1344                 return -EINVAL;
1345         }
1346
1347         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1348                      PAGE_SIZE);
1349         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1350                         PAGE_SIZE);
1351         max_num = (max_buf - sizeof(struct smb_hdr)) /
1352                                                 sizeof(LOCKING_ANDX_RANGE);
1353         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1354         if (!buf) {
1355                 free_xid(xid);
1356                 return -ENOMEM;
1357         }
1358
1359         for (i = 0; i < 2; i++) {
1360                 cur = buf;
1361                 num = 0;
1362                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1363                         if (li->type != types[i])
1364                                 continue;
1365                         cur->Pid = cpu_to_le16(li->pid);
1366                         cur->LengthLow = cpu_to_le32((u32)li->length);
1367                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1368                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1369                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1370                         if (++num == max_num) {
1371                                 stored_rc = cifs_lockv(xid, tcon,
1372                                                        cfile->fid.netfid,
1373                                                        (__u8)li->type, 0, num,
1374                                                        buf);
1375                                 if (stored_rc)
1376                                         rc = stored_rc;
1377                                 cur = buf;
1378                                 num = 0;
1379                         } else
1380                                 cur++;
1381                 }
1382
1383                 if (num) {
1384                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1385                                                (__u8)types[i], 0, num, buf);
1386                         if (stored_rc)
1387                                 rc = stored_rc;
1388                 }
1389         }
1390
1391         kfree(buf);
1392         free_xid(xid);
1393         return rc;
1394 }
1395
1396 static __u32
1397 hash_lockowner(fl_owner_t owner)
1398 {
1399         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1400 }
1401 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1402
1403 struct lock_to_push {
1404         struct list_head llist;
1405         __u64 offset;
1406         __u64 length;
1407         __u32 pid;
1408         __u16 netfid;
1409         __u8 type;
1410 };
1411
1412 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1413 static int
1414 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1415 {
1416         struct inode *inode = d_inode(cfile->dentry);
1417         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1418         struct file_lock *flock;
1419         struct file_lock_context *flctx = locks_inode_context(inode);
1420         unsigned int count = 0, i;
1421         int rc = 0, xid, type;
1422         struct list_head locks_to_send, *el;
1423         struct lock_to_push *lck, *tmp;
1424         __u64 length;
1425
1426         xid = get_xid();
1427
1428         if (!flctx)
1429                 goto out;
1430
1431         spin_lock(&flctx->flc_lock);
1432         list_for_each(el, &flctx->flc_posix) {
1433                 count++;
1434         }
1435         spin_unlock(&flctx->flc_lock);
1436
1437         INIT_LIST_HEAD(&locks_to_send);
1438
1439         /*
1440          * Allocating count locks is enough because no FL_POSIX locks can be
1441          * added to the list while we are holding cinode->lock_sem that
1442          * protects locking operations of this inode.
1443          */
1444         for (i = 0; i < count; i++) {
1445                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1446                 if (!lck) {
1447                         rc = -ENOMEM;
1448                         goto err_out;
1449                 }
1450                 list_add_tail(&lck->llist, &locks_to_send);
1451         }
1452
1453         el = locks_to_send.next;
1454         spin_lock(&flctx->flc_lock);
1455         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1456                 if (el == &locks_to_send) {
1457                         /*
1458                          * The list ended. We don't have enough allocated
1459                          * structures - something is really wrong.
1460                          */
1461                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1462                         break;
1463                 }
1464                 length = cifs_flock_len(flock);
1465                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1466                         type = CIFS_RDLCK;
1467                 else
1468                         type = CIFS_WRLCK;
1469                 lck = list_entry(el, struct lock_to_push, llist);
1470                 lck->pid = hash_lockowner(flock->fl_owner);
1471                 lck->netfid = cfile->fid.netfid;
1472                 lck->length = length;
1473                 lck->type = type;
1474                 lck->offset = flock->fl_start;
1475         }
1476         spin_unlock(&flctx->flc_lock);
1477
1478         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1479                 int stored_rc;
1480
1481                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1482                                              lck->offset, lck->length, NULL,
1483                                              lck->type, 0);
1484                 if (stored_rc)
1485                         rc = stored_rc;
1486                 list_del(&lck->llist);
1487                 kfree(lck);
1488         }
1489
1490 out:
1491         free_xid(xid);
1492         return rc;
1493 err_out:
1494         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1495                 list_del(&lck->llist);
1496                 kfree(lck);
1497         }
1498         goto out;
1499 }
1500 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1501
1502 static int
1503 cifs_push_locks(struct cifsFileInfo *cfile)
1504 {
1505         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1506         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1507         int rc = 0;
1508 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1509         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1510 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1511
1512         /* we are going to update can_cache_brlcks here - need a write access */
1513         cifs_down_write(&cinode->lock_sem);
1514         if (!cinode->can_cache_brlcks) {
1515                 up_write(&cinode->lock_sem);
1516                 return rc;
1517         }
1518
1519 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1520         if (cap_unix(tcon->ses) &&
1521             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1522             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1523                 rc = cifs_push_posix_locks(cfile);
1524         else
1525 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1526                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1527
1528         cinode->can_cache_brlcks = false;
1529         up_write(&cinode->lock_sem);
1530         return rc;
1531 }
1532
1533 static void
1534 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1535                 bool *wait_flag, struct TCP_Server_Info *server)
1536 {
1537         if (flock->fl_flags & FL_POSIX)
1538                 cifs_dbg(FYI, "Posix\n");
1539         if (flock->fl_flags & FL_FLOCK)
1540                 cifs_dbg(FYI, "Flock\n");
1541         if (flock->fl_flags & FL_SLEEP) {
1542                 cifs_dbg(FYI, "Blocking lock\n");
1543                 *wait_flag = true;
1544         }
1545         if (flock->fl_flags & FL_ACCESS)
1546                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1547         if (flock->fl_flags & FL_LEASE)
1548                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1549         if (flock->fl_flags &
1550             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1551                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1552                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1553
1554         *type = server->vals->large_lock_type;
1555         if (flock->fl_type == F_WRLCK) {
1556                 cifs_dbg(FYI, "F_WRLCK\n");
1557                 *type |= server->vals->exclusive_lock_type;
1558                 *lock = 1;
1559         } else if (flock->fl_type == F_UNLCK) {
1560                 cifs_dbg(FYI, "F_UNLCK\n");
1561                 *type |= server->vals->unlock_lock_type;
1562                 *unlock = 1;
1563                 /* Check if unlock includes more than one lock range */
1564         } else if (flock->fl_type == F_RDLCK) {
1565                 cifs_dbg(FYI, "F_RDLCK\n");
1566                 *type |= server->vals->shared_lock_type;
1567                 *lock = 1;
1568         } else if (flock->fl_type == F_EXLCK) {
1569                 cifs_dbg(FYI, "F_EXLCK\n");
1570                 *type |= server->vals->exclusive_lock_type;
1571                 *lock = 1;
1572         } else if (flock->fl_type == F_SHLCK) {
1573                 cifs_dbg(FYI, "F_SHLCK\n");
1574                 *type |= server->vals->shared_lock_type;
1575                 *lock = 1;
1576         } else
1577                 cifs_dbg(FYI, "Unknown type of lock\n");
1578 }
1579
1580 static int
1581 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1582            bool wait_flag, bool posix_lck, unsigned int xid)
1583 {
1584         int rc = 0;
1585         __u64 length = cifs_flock_len(flock);
1586         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1587         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1588         struct TCP_Server_Info *server = tcon->ses->server;
1589 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1590         __u16 netfid = cfile->fid.netfid;
1591
1592         if (posix_lck) {
1593                 int posix_lock_type;
1594
1595                 rc = cifs_posix_lock_test(file, flock);
1596                 if (!rc)
1597                         return rc;
1598
1599                 if (type & server->vals->shared_lock_type)
1600                         posix_lock_type = CIFS_RDLCK;
1601                 else
1602                         posix_lock_type = CIFS_WRLCK;
1603                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1604                                       hash_lockowner(flock->fl_owner),
1605                                       flock->fl_start, length, flock,
1606                                       posix_lock_type, wait_flag);
1607                 return rc;
1608         }
1609 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1610
1611         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1612         if (!rc)
1613                 return rc;
1614
1615         /* BB we could chain these into one lock request BB */
1616         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1617                                     1, 0, false);
1618         if (rc == 0) {
1619                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1620                                             type, 0, 1, false);
1621                 flock->fl_type = F_UNLCK;
1622                 if (rc != 0)
1623                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1624                                  rc);
1625                 return 0;
1626         }
1627
1628         if (type & server->vals->shared_lock_type) {
1629                 flock->fl_type = F_WRLCK;
1630                 return 0;
1631         }
1632
1633         type &= ~server->vals->exclusive_lock_type;
1634
1635         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1636                                     type | server->vals->shared_lock_type,
1637                                     1, 0, false);
1638         if (rc == 0) {
1639                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1640                         type | server->vals->shared_lock_type, 0, 1, false);
1641                 flock->fl_type = F_RDLCK;
1642                 if (rc != 0)
1643                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1644                                  rc);
1645         } else
1646                 flock->fl_type = F_WRLCK;
1647
1648         return 0;
1649 }
1650
1651 void
1652 cifs_move_llist(struct list_head *source, struct list_head *dest)
1653 {
1654         struct list_head *li, *tmp;
1655         list_for_each_safe(li, tmp, source)
1656                 list_move(li, dest);
1657 }
1658
1659 void
1660 cifs_free_llist(struct list_head *llist)
1661 {
1662         struct cifsLockInfo *li, *tmp;
1663         list_for_each_entry_safe(li, tmp, llist, llist) {
1664                 cifs_del_lock_waiters(li);
1665                 list_del(&li->llist);
1666                 kfree(li);
1667         }
1668 }
1669
1670 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1671 int
1672 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1673                   unsigned int xid)
1674 {
1675         int rc = 0, stored_rc;
1676         static const int types[] = {
1677                 LOCKING_ANDX_LARGE_FILES,
1678                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1679         };
1680         unsigned int i;
1681         unsigned int max_num, num, max_buf;
1682         LOCKING_ANDX_RANGE *buf, *cur;
1683         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1684         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1685         struct cifsLockInfo *li, *tmp;
1686         __u64 length = cifs_flock_len(flock);
1687         struct list_head tmp_llist;
1688
1689         INIT_LIST_HEAD(&tmp_llist);
1690
1691         /*
1692          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1693          * and check it before using.
1694          */
1695         max_buf = tcon->ses->server->maxBuf;
1696         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1697                 return -EINVAL;
1698
1699         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1700                      PAGE_SIZE);
1701         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1702                         PAGE_SIZE);
1703         max_num = (max_buf - sizeof(struct smb_hdr)) /
1704                                                 sizeof(LOCKING_ANDX_RANGE);
1705         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1706         if (!buf)
1707                 return -ENOMEM;
1708
1709         cifs_down_write(&cinode->lock_sem);
1710         for (i = 0; i < 2; i++) {
1711                 cur = buf;
1712                 num = 0;
1713                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1714                         if (flock->fl_start > li->offset ||
1715                             (flock->fl_start + length) <
1716                             (li->offset + li->length))
1717                                 continue;
1718                         if (current->tgid != li->pid)
1719                                 continue;
1720                         if (types[i] != li->type)
1721                                 continue;
1722                         if (cinode->can_cache_brlcks) {
1723                                 /*
1724                                  * We can cache brlock requests - simply remove
1725                                  * a lock from the file's list.
1726                                  */
1727                                 list_del(&li->llist);
1728                                 cifs_del_lock_waiters(li);
1729                                 kfree(li);
1730                                 continue;
1731                         }
1732                         cur->Pid = cpu_to_le16(li->pid);
1733                         cur->LengthLow = cpu_to_le32((u32)li->length);
1734                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1735                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1736                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1737                         /*
1738                          * We need to save a lock here to let us add it again to
1739                          * the file's list if the unlock range request fails on
1740                          * the server.
1741                          */
1742                         list_move(&li->llist, &tmp_llist);
1743                         if (++num == max_num) {
1744                                 stored_rc = cifs_lockv(xid, tcon,
1745                                                        cfile->fid.netfid,
1746                                                        li->type, num, 0, buf);
1747                                 if (stored_rc) {
1748                                         /*
1749                                          * We failed on the unlock range
1750                                          * request - add all locks from the tmp
1751                                          * list to the head of the file's list.
1752                                          */
1753                                         cifs_move_llist(&tmp_llist,
1754                                                         &cfile->llist->locks);
1755                                         rc = stored_rc;
1756                                 } else
1757                                         /*
1758                                          * The unlock range request succeed -
1759                                          * free the tmp list.
1760                                          */
1761                                         cifs_free_llist(&tmp_llist);
1762                                 cur = buf;
1763                                 num = 0;
1764                         } else
1765                                 cur++;
1766                 }
1767                 if (num) {
1768                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1769                                                types[i], num, 0, buf);
1770                         if (stored_rc) {
1771                                 cifs_move_llist(&tmp_llist,
1772                                                 &cfile->llist->locks);
1773                                 rc = stored_rc;
1774                         } else
1775                                 cifs_free_llist(&tmp_llist);
1776                 }
1777         }
1778
1779         up_write(&cinode->lock_sem);
1780         kfree(buf);
1781         return rc;
1782 }
1783 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1784
1785 static int
1786 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1787            bool wait_flag, bool posix_lck, int lock, int unlock,
1788            unsigned int xid)
1789 {
1790         int rc = 0;
1791         __u64 length = cifs_flock_len(flock);
1792         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1793         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1794         struct TCP_Server_Info *server = tcon->ses->server;
1795         struct inode *inode = d_inode(cfile->dentry);
1796
1797 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1798         if (posix_lck) {
1799                 int posix_lock_type;
1800
1801                 rc = cifs_posix_lock_set(file, flock);
1802                 if (rc <= FILE_LOCK_DEFERRED)
1803                         return rc;
1804
1805                 if (type & server->vals->shared_lock_type)
1806                         posix_lock_type = CIFS_RDLCK;
1807                 else
1808                         posix_lock_type = CIFS_WRLCK;
1809
1810                 if (unlock == 1)
1811                         posix_lock_type = CIFS_UNLCK;
1812
1813                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1814                                       hash_lockowner(flock->fl_owner),
1815                                       flock->fl_start, length,
1816                                       NULL, posix_lock_type, wait_flag);
1817                 goto out;
1818         }
1819 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1820         if (lock) {
1821                 struct cifsLockInfo *lock;
1822
1823                 lock = cifs_lock_init(flock->fl_start, length, type,
1824                                       flock->fl_flags);
1825                 if (!lock)
1826                         return -ENOMEM;
1827
1828                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1829                 if (rc < 0) {
1830                         kfree(lock);
1831                         return rc;
1832                 }
1833                 if (!rc)
1834                         goto out;
1835
1836                 /*
1837                  * Windows 7 server can delay breaking lease from read to None
1838                  * if we set a byte-range lock on a file - break it explicitly
1839                  * before sending the lock to the server to be sure the next
1840                  * read won't conflict with non-overlapted locks due to
1841                  * pagereading.
1842                  */
1843                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1844                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1845                         cifs_zap_mapping(inode);
1846                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1847                                  inode);
1848                         CIFS_I(inode)->oplock = 0;
1849                 }
1850
1851                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1852                                             type, 1, 0, wait_flag);
1853                 if (rc) {
1854                         kfree(lock);
1855                         return rc;
1856                 }
1857
1858                 cifs_lock_add(cfile, lock);
1859         } else if (unlock)
1860                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1861
1862 out:
1863         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1864                 /*
1865                  * If this is a request to remove all locks because we
1866                  * are closing the file, it doesn't matter if the
1867                  * unlocking failed as both cifs.ko and the SMB server
1868                  * remove the lock on file close
1869                  */
1870                 if (rc) {
1871                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1872                         if (!(flock->fl_flags & FL_CLOSE))
1873                                 return rc;
1874                 }
1875                 rc = locks_lock_file_wait(file, flock);
1876         }
1877         return rc;
1878 }
1879
1880 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1881 {
1882         int rc, xid;
1883         int lock = 0, unlock = 0;
1884         bool wait_flag = false;
1885         bool posix_lck = false;
1886         struct cifs_sb_info *cifs_sb;
1887         struct cifs_tcon *tcon;
1888         struct cifsFileInfo *cfile;
1889         __u32 type;
1890
1891         xid = get_xid();
1892
1893         if (!(fl->fl_flags & FL_FLOCK)) {
1894                 rc = -ENOLCK;
1895                 free_xid(xid);
1896                 return rc;
1897         }
1898
1899         cfile = (struct cifsFileInfo *)file->private_data;
1900         tcon = tlink_tcon(cfile->tlink);
1901
1902         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1903                         tcon->ses->server);
1904         cifs_sb = CIFS_FILE_SB(file);
1905
1906         if (cap_unix(tcon->ses) &&
1907             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1908             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1909                 posix_lck = true;
1910
1911         if (!lock && !unlock) {
1912                 /*
1913                  * if no lock or unlock then nothing to do since we do not
1914                  * know what it is
1915                  */
1916                 rc = -EOPNOTSUPP;
1917                 free_xid(xid);
1918                 return rc;
1919         }
1920
1921         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1922                         xid);
1923         free_xid(xid);
1924         return rc;
1925
1926
1927 }
1928
1929 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1930 {
1931         int rc, xid;
1932         int lock = 0, unlock = 0;
1933         bool wait_flag = false;
1934         bool posix_lck = false;
1935         struct cifs_sb_info *cifs_sb;
1936         struct cifs_tcon *tcon;
1937         struct cifsFileInfo *cfile;
1938         __u32 type;
1939
1940         rc = -EACCES;
1941         xid = get_xid();
1942
1943         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
1944                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
1945                  (long long)flock->fl_end);
1946
1947         cfile = (struct cifsFileInfo *)file->private_data;
1948         tcon = tlink_tcon(cfile->tlink);
1949
1950         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1951                         tcon->ses->server);
1952         cifs_sb = CIFS_FILE_SB(file);
1953         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1954
1955         if (cap_unix(tcon->ses) &&
1956             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1957             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1958                 posix_lck = true;
1959         /*
1960          * BB add code here to normalize offset and length to account for
1961          * negative length which we can not accept over the wire.
1962          */
1963         if (IS_GETLK(cmd)) {
1964                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1965                 free_xid(xid);
1966                 return rc;
1967         }
1968
1969         if (!lock && !unlock) {
1970                 /*
1971                  * if no lock or unlock then nothing to do since we do not
1972                  * know what it is
1973                  */
1974                 free_xid(xid);
1975                 return -EOPNOTSUPP;
1976         }
1977
1978         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1979                         xid);
1980         free_xid(xid);
1981         return rc;
1982 }
1983
1984 /*
1985  * update the file size (if needed) after a write. Should be called with
1986  * the inode->i_lock held
1987  */
1988 void
1989 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1990                       unsigned int bytes_written)
1991 {
1992         loff_t end_of_write = offset + bytes_written;
1993
1994         if (end_of_write > cifsi->server_eof)
1995                 cifsi->server_eof = end_of_write;
1996 }
1997
1998 static ssize_t
1999 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2000            size_t write_size, loff_t *offset)
2001 {
2002         int rc = 0;
2003         unsigned int bytes_written = 0;
2004         unsigned int total_written;
2005         struct cifs_tcon *tcon;
2006         struct TCP_Server_Info *server;
2007         unsigned int xid;
2008         struct dentry *dentry = open_file->dentry;
2009         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2010         struct cifs_io_parms io_parms = {0};
2011
2012         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2013                  write_size, *offset, dentry);
2014
2015         tcon = tlink_tcon(open_file->tlink);
2016         server = tcon->ses->server;
2017
2018         if (!server->ops->sync_write)
2019                 return -ENOSYS;
2020
2021         xid = get_xid();
2022
2023         for (total_written = 0; write_size > total_written;
2024              total_written += bytes_written) {
2025                 rc = -EAGAIN;
2026                 while (rc == -EAGAIN) {
2027                         struct kvec iov[2];
2028                         unsigned int len;
2029
2030                         if (open_file->invalidHandle) {
2031                                 /* we could deadlock if we called
2032                                    filemap_fdatawait from here so tell
2033                                    reopen_file not to flush data to
2034                                    server now */
2035                                 rc = cifs_reopen_file(open_file, false);
2036                                 if (rc != 0)
2037                                         break;
2038                         }
2039
2040                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2041                                   (unsigned int)write_size - total_written);
2042                         /* iov[0] is reserved for smb header */
2043                         iov[1].iov_base = (char *)write_data + total_written;
2044                         iov[1].iov_len = len;
2045                         io_parms.pid = pid;
2046                         io_parms.tcon = tcon;
2047                         io_parms.offset = *offset;
2048                         io_parms.length = len;
2049                         rc = server->ops->sync_write(xid, &open_file->fid,
2050                                         &io_parms, &bytes_written, iov, 1);
2051                 }
2052                 if (rc || (bytes_written == 0)) {
2053                         if (total_written)
2054                                 break;
2055                         else {
2056                                 free_xid(xid);
2057                                 return rc;
2058                         }
2059                 } else {
2060                         spin_lock(&d_inode(dentry)->i_lock);
2061                         cifs_update_eof(cifsi, *offset, bytes_written);
2062                         spin_unlock(&d_inode(dentry)->i_lock);
2063                         *offset += bytes_written;
2064                 }
2065         }
2066
2067         cifs_stats_bytes_written(tcon, total_written);
2068
2069         if (total_written > 0) {
2070                 spin_lock(&d_inode(dentry)->i_lock);
2071                 if (*offset > d_inode(dentry)->i_size) {
2072                         i_size_write(d_inode(dentry), *offset);
2073                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2074                 }
2075                 spin_unlock(&d_inode(dentry)->i_lock);
2076         }
2077         mark_inode_dirty_sync(d_inode(dentry));
2078         free_xid(xid);
2079         return total_written;
2080 }
2081
2082 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2083                                         bool fsuid_only)
2084 {
2085         struct cifsFileInfo *open_file = NULL;
2086         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2087
2088         /* only filter by fsuid on multiuser mounts */
2089         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2090                 fsuid_only = false;
2091
2092         spin_lock(&cifs_inode->open_file_lock);
2093         /* we could simply get the first_list_entry since write-only entries
2094            are always at the end of the list but since the first entry might
2095            have a close pending, we go through the whole list */
2096         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2097                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2098                         continue;
2099                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2100                         if ((!open_file->invalidHandle)) {
2101                                 /* found a good file */
2102                                 /* lock it so it will not be closed on us */
2103                                 cifsFileInfo_get(open_file);
2104                                 spin_unlock(&cifs_inode->open_file_lock);
2105                                 return open_file;
2106                         } /* else might as well continue, and look for
2107                              another, or simply have the caller reopen it
2108                              again rather than trying to fix this handle */
2109                 } else /* write only file */
2110                         break; /* write only files are last so must be done */
2111         }
2112         spin_unlock(&cifs_inode->open_file_lock);
2113         return NULL;
2114 }
2115
2116 /* Return -EBADF if no handle is found and general rc otherwise */
2117 int
2118 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2119                        struct cifsFileInfo **ret_file)
2120 {
2121         struct cifsFileInfo *open_file, *inv_file = NULL;
2122         struct cifs_sb_info *cifs_sb;
2123         bool any_available = false;
2124         int rc = -EBADF;
2125         unsigned int refind = 0;
2126         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2127         bool with_delete = flags & FIND_WR_WITH_DELETE;
2128         *ret_file = NULL;
2129
2130         /*
2131          * Having a null inode here (because mapping->host was set to zero by
2132          * the VFS or MM) should not happen but we had reports of on oops (due
2133          * to it being zero) during stress testcases so we need to check for it
2134          */
2135
2136         if (cifs_inode == NULL) {
2137                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2138                 dump_stack();
2139                 return rc;
2140         }
2141
2142         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2143
2144         /* only filter by fsuid on multiuser mounts */
2145         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2146                 fsuid_only = false;
2147
2148         spin_lock(&cifs_inode->open_file_lock);
2149 refind_writable:
2150         if (refind > MAX_REOPEN_ATT) {
2151                 spin_unlock(&cifs_inode->open_file_lock);
2152                 return rc;
2153         }
2154         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2155                 if (!any_available && open_file->pid != current->tgid)
2156                         continue;
2157                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2158                         continue;
2159                 if (with_delete && !(open_file->fid.access & DELETE))
2160                         continue;
2161                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2162                         if (!open_file->invalidHandle) {
2163                                 /* found a good writable file */
2164                                 cifsFileInfo_get(open_file);
2165                                 spin_unlock(&cifs_inode->open_file_lock);
2166                                 *ret_file = open_file;
2167                                 return 0;
2168                         } else {
2169                                 if (!inv_file)
2170                                         inv_file = open_file;
2171                         }
2172                 }
2173         }
2174         /* couldn't find useable FH with same pid, try any available */
2175         if (!any_available) {
2176                 any_available = true;
2177                 goto refind_writable;
2178         }
2179
2180         if (inv_file) {
2181                 any_available = false;
2182                 cifsFileInfo_get(inv_file);
2183         }
2184
2185         spin_unlock(&cifs_inode->open_file_lock);
2186
2187         if (inv_file) {
2188                 rc = cifs_reopen_file(inv_file, false);
2189                 if (!rc) {
2190                         *ret_file = inv_file;
2191                         return 0;
2192                 }
2193
2194                 spin_lock(&cifs_inode->open_file_lock);
2195                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2196                 spin_unlock(&cifs_inode->open_file_lock);
2197                 cifsFileInfo_put(inv_file);
2198                 ++refind;
2199                 inv_file = NULL;
2200                 spin_lock(&cifs_inode->open_file_lock);
2201                 goto refind_writable;
2202         }
2203
2204         return rc;
2205 }
2206
2207 struct cifsFileInfo *
2208 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2209 {
2210         struct cifsFileInfo *cfile;
2211         int rc;
2212
2213         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2214         if (rc)
2215                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2216
2217         return cfile;
2218 }
2219
2220 int
2221 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2222                        int flags,
2223                        struct cifsFileInfo **ret_file)
2224 {
2225         struct cifsFileInfo *cfile;
2226         void *page = alloc_dentry_path();
2227
2228         *ret_file = NULL;
2229
2230         spin_lock(&tcon->open_file_lock);
2231         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2232                 struct cifsInodeInfo *cinode;
2233                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2234                 if (IS_ERR(full_path)) {
2235                         spin_unlock(&tcon->open_file_lock);
2236                         free_dentry_path(page);
2237                         return PTR_ERR(full_path);
2238                 }
2239                 if (strcmp(full_path, name))
2240                         continue;
2241
2242                 cinode = CIFS_I(d_inode(cfile->dentry));
2243                 spin_unlock(&tcon->open_file_lock);
2244                 free_dentry_path(page);
2245                 return cifs_get_writable_file(cinode, flags, ret_file);
2246         }
2247
2248         spin_unlock(&tcon->open_file_lock);
2249         free_dentry_path(page);
2250         return -ENOENT;
2251 }
2252
2253 int
2254 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2255                        struct cifsFileInfo **ret_file)
2256 {
2257         struct cifsFileInfo *cfile;
2258         void *page = alloc_dentry_path();
2259
2260         *ret_file = NULL;
2261
2262         spin_lock(&tcon->open_file_lock);
2263         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2264                 struct cifsInodeInfo *cinode;
2265                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2266                 if (IS_ERR(full_path)) {
2267                         spin_unlock(&tcon->open_file_lock);
2268                         free_dentry_path(page);
2269                         return PTR_ERR(full_path);
2270                 }
2271                 if (strcmp(full_path, name))
2272                         continue;
2273
2274                 cinode = CIFS_I(d_inode(cfile->dentry));
2275                 spin_unlock(&tcon->open_file_lock);
2276                 free_dentry_path(page);
2277                 *ret_file = find_readable_file(cinode, 0);
2278                 return *ret_file ? 0 : -ENOENT;
2279         }
2280
2281         spin_unlock(&tcon->open_file_lock);
2282         free_dentry_path(page);
2283         return -ENOENT;
2284 }
2285
2286 void
2287 cifs_writedata_release(struct kref *refcount)
2288 {
2289         struct cifs_writedata *wdata = container_of(refcount,
2290                                         struct cifs_writedata, refcount);
2291 #ifdef CONFIG_CIFS_SMB_DIRECT
2292         if (wdata->mr) {
2293                 smbd_deregister_mr(wdata->mr);
2294                 wdata->mr = NULL;
2295         }
2296 #endif
2297
2298         if (wdata->cfile)
2299                 cifsFileInfo_put(wdata->cfile);
2300
2301         kvfree(wdata->pages);
2302         kfree(wdata);
2303 }
2304
2305 /*
2306  * Write failed with a retryable error. Resend the write request. It's also
2307  * possible that the page was redirtied so re-clean the page.
2308  */
2309 static void
2310 cifs_writev_requeue(struct cifs_writedata *wdata)
2311 {
2312         int i, rc = 0;
2313         struct inode *inode = d_inode(wdata->cfile->dentry);
2314         struct TCP_Server_Info *server;
2315         unsigned int rest_len;
2316
2317         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2318         i = 0;
2319         rest_len = wdata->bytes;
2320         do {
2321                 struct cifs_writedata *wdata2;
2322                 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2323
2324                 wsize = server->ops->wp_retry_size(inode);
2325                 if (wsize < rest_len) {
2326                         nr_pages = wsize / PAGE_SIZE;
2327                         if (!nr_pages) {
2328                                 rc = -EOPNOTSUPP;
2329                                 break;
2330                         }
2331                         cur_len = nr_pages * PAGE_SIZE;
2332                         tailsz = PAGE_SIZE;
2333                 } else {
2334                         nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2335                         cur_len = rest_len;
2336                         tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2337                 }
2338
2339                 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2340                 if (!wdata2) {
2341                         rc = -ENOMEM;
2342                         break;
2343                 }
2344
2345                 for (j = 0; j < nr_pages; j++) {
2346                         wdata2->pages[j] = wdata->pages[i + j];
2347                         lock_page(wdata2->pages[j]);
2348                         clear_page_dirty_for_io(wdata2->pages[j]);
2349                 }
2350
2351                 wdata2->sync_mode = wdata->sync_mode;
2352                 wdata2->nr_pages = nr_pages;
2353                 wdata2->offset = page_offset(wdata2->pages[0]);
2354                 wdata2->pagesz = PAGE_SIZE;
2355                 wdata2->tailsz = tailsz;
2356                 wdata2->bytes = cur_len;
2357
2358                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2359                                             &wdata2->cfile);
2360                 if (!wdata2->cfile) {
2361                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2362                                  rc);
2363                         if (!is_retryable_error(rc))
2364                                 rc = -EBADF;
2365                 } else {
2366                         wdata2->pid = wdata2->cfile->pid;
2367                         rc = server->ops->async_writev(wdata2,
2368                                                        cifs_writedata_release);
2369                 }
2370
2371                 for (j = 0; j < nr_pages; j++) {
2372                         unlock_page(wdata2->pages[j]);
2373                         if (rc != 0 && !is_retryable_error(rc)) {
2374                                 SetPageError(wdata2->pages[j]);
2375                                 end_page_writeback(wdata2->pages[j]);
2376                                 put_page(wdata2->pages[j]);
2377                         }
2378                 }
2379
2380                 kref_put(&wdata2->refcount, cifs_writedata_release);
2381                 if (rc) {
2382                         if (is_retryable_error(rc))
2383                                 continue;
2384                         i += nr_pages;
2385                         break;
2386                 }
2387
2388                 rest_len -= cur_len;
2389                 i += nr_pages;
2390         } while (i < wdata->nr_pages);
2391
2392         /* cleanup remaining pages from the original wdata */
2393         for (; i < wdata->nr_pages; i++) {
2394                 SetPageError(wdata->pages[i]);
2395                 end_page_writeback(wdata->pages[i]);
2396                 put_page(wdata->pages[i]);
2397         }
2398
2399         if (rc != 0 && !is_retryable_error(rc))
2400                 mapping_set_error(inode->i_mapping, rc);
2401         kref_put(&wdata->refcount, cifs_writedata_release);
2402 }
2403
2404 void
2405 cifs_writev_complete(struct work_struct *work)
2406 {
2407         struct cifs_writedata *wdata = container_of(work,
2408                                                 struct cifs_writedata, work);
2409         struct inode *inode = d_inode(wdata->cfile->dentry);
2410         int i = 0;
2411
2412         if (wdata->result == 0) {
2413                 spin_lock(&inode->i_lock);
2414                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2415                 spin_unlock(&inode->i_lock);
2416                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2417                                          wdata->bytes);
2418         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2419                 return cifs_writev_requeue(wdata);
2420
2421         for (i = 0; i < wdata->nr_pages; i++) {
2422                 struct page *page = wdata->pages[i];
2423
2424                 if (wdata->result == -EAGAIN)
2425                         __set_page_dirty_nobuffers(page);
2426                 else if (wdata->result < 0)
2427                         SetPageError(page);
2428                 end_page_writeback(page);
2429                 cifs_readpage_to_fscache(inode, page);
2430                 put_page(page);
2431         }
2432         if (wdata->result != -EAGAIN)
2433                 mapping_set_error(inode->i_mapping, wdata->result);
2434         kref_put(&wdata->refcount, cifs_writedata_release);
2435 }
2436
2437 struct cifs_writedata *
2438 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2439 {
2440         struct cifs_writedata *writedata = NULL;
2441         struct page **pages =
2442                 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2443         if (pages) {
2444                 writedata = cifs_writedata_direct_alloc(pages, complete);
2445                 if (!writedata)
2446                         kvfree(pages);
2447         }
2448
2449         return writedata;
2450 }
2451
2452 struct cifs_writedata *
2453 cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2454 {
2455         struct cifs_writedata *wdata;
2456
2457         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2458         if (wdata != NULL) {
2459                 wdata->pages = pages;
2460                 kref_init(&wdata->refcount);
2461                 INIT_LIST_HEAD(&wdata->list);
2462                 init_completion(&wdata->done);
2463                 INIT_WORK(&wdata->work, complete);
2464         }
2465         return wdata;
2466 }
2467
2468
2469 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2470 {
2471         struct address_space *mapping = page->mapping;
2472         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2473         char *write_data;
2474         int rc = -EFAULT;
2475         int bytes_written = 0;
2476         struct inode *inode;
2477         struct cifsFileInfo *open_file;
2478
2479         if (!mapping || !mapping->host)
2480                 return -EFAULT;
2481
2482         inode = page->mapping->host;
2483
2484         offset += (loff_t)from;
2485         write_data = kmap(page);
2486         write_data += from;
2487
2488         if ((to > PAGE_SIZE) || (from > to)) {
2489                 kunmap(page);
2490                 return -EIO;
2491         }
2492
2493         /* racing with truncate? */
2494         if (offset > mapping->host->i_size) {
2495                 kunmap(page);
2496                 return 0; /* don't care */
2497         }
2498
2499         /* check to make sure that we are not extending the file */
2500         if (mapping->host->i_size - offset < (loff_t)to)
2501                 to = (unsigned)(mapping->host->i_size - offset);
2502
2503         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2504                                     &open_file);
2505         if (!rc) {
2506                 bytes_written = cifs_write(open_file, open_file->pid,
2507                                            write_data, to - from, &offset);
2508                 cifsFileInfo_put(open_file);
2509                 /* Does mm or vfs already set times? */
2510                 inode->i_atime = inode->i_mtime = current_time(inode);
2511                 if ((bytes_written > 0) && (offset))
2512                         rc = 0;
2513                 else if (bytes_written < 0)
2514                         rc = bytes_written;
2515                 else
2516                         rc = -EFAULT;
2517         } else {
2518                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2519                 if (!is_retryable_error(rc))
2520                         rc = -EIO;
2521         }
2522
2523         kunmap(page);
2524         return rc;
2525 }
2526
2527 static struct cifs_writedata *
2528 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2529                           pgoff_t end, pgoff_t *index,
2530                           unsigned int *found_pages)
2531 {
2532         struct cifs_writedata *wdata;
2533
2534         wdata = cifs_writedata_alloc((unsigned int)tofind,
2535                                      cifs_writev_complete);
2536         if (!wdata)
2537                 return NULL;
2538
2539         *found_pages = find_get_pages_range_tag(mapping, index, end,
2540                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2541         return wdata;
2542 }
2543
2544 static unsigned int
2545 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2546                     struct address_space *mapping,
2547                     struct writeback_control *wbc,
2548                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2549 {
2550         unsigned int nr_pages = 0, i;
2551         struct page *page;
2552
2553         for (i = 0; i < found_pages; i++) {
2554                 page = wdata->pages[i];
2555                 /*
2556                  * At this point we hold neither the i_pages lock nor the
2557                  * page lock: the page may be truncated or invalidated
2558                  * (changing page->mapping to NULL), or even swizzled
2559                  * back from swapper_space to tmpfs file mapping
2560                  */
2561
2562                 if (nr_pages == 0)
2563                         lock_page(page);
2564                 else if (!trylock_page(page))
2565                         break;
2566
2567                 if (unlikely(page->mapping != mapping)) {
2568                         unlock_page(page);
2569                         break;
2570                 }
2571
2572                 if (!wbc->range_cyclic && page->index > end) {
2573                         *done = true;
2574                         unlock_page(page);
2575                         break;
2576                 }
2577
2578                 if (*next && (page->index != *next)) {
2579                         /* Not next consecutive page */
2580                         unlock_page(page);
2581                         break;
2582                 }
2583
2584                 if (wbc->sync_mode != WB_SYNC_NONE)
2585                         wait_on_page_writeback(page);
2586
2587                 if (PageWriteback(page) ||
2588                                 !clear_page_dirty_for_io(page)) {
2589                         unlock_page(page);
2590                         break;
2591                 }
2592
2593                 /*
2594                  * This actually clears the dirty bit in the radix tree.
2595                  * See cifs_writepage() for more commentary.
2596                  */
2597                 set_page_writeback(page);
2598                 if (page_offset(page) >= i_size_read(mapping->host)) {
2599                         *done = true;
2600                         unlock_page(page);
2601                         end_page_writeback(page);
2602                         break;
2603                 }
2604
2605                 wdata->pages[i] = page;
2606                 *next = page->index + 1;
2607                 ++nr_pages;
2608         }
2609
2610         /* reset index to refind any pages skipped */
2611         if (nr_pages == 0)
2612                 *index = wdata->pages[0]->index + 1;
2613
2614         /* put any pages we aren't going to use */
2615         for (i = nr_pages; i < found_pages; i++) {
2616                 put_page(wdata->pages[i]);
2617                 wdata->pages[i] = NULL;
2618         }
2619
2620         return nr_pages;
2621 }
2622
2623 static int
2624 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2625                  struct address_space *mapping, struct writeback_control *wbc)
2626 {
2627         int rc;
2628
2629         wdata->sync_mode = wbc->sync_mode;
2630         wdata->nr_pages = nr_pages;
2631         wdata->offset = page_offset(wdata->pages[0]);
2632         wdata->pagesz = PAGE_SIZE;
2633         wdata->tailsz = min(i_size_read(mapping->host) -
2634                         page_offset(wdata->pages[nr_pages - 1]),
2635                         (loff_t)PAGE_SIZE);
2636         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2637         wdata->pid = wdata->cfile->pid;
2638
2639         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2640         if (rc)
2641                 return rc;
2642
2643         if (wdata->cfile->invalidHandle)
2644                 rc = -EAGAIN;
2645         else
2646                 rc = wdata->server->ops->async_writev(wdata,
2647                                                       cifs_writedata_release);
2648
2649         return rc;
2650 }
2651
2652 static int
2653 cifs_writepage_locked(struct page *page, struct writeback_control *wbc);
2654
2655 static int cifs_write_one_page(struct page *page, struct writeback_control *wbc,
2656                 void *data)
2657 {
2658         struct address_space *mapping = data;
2659         int ret;
2660
2661         ret = cifs_writepage_locked(page, wbc);
2662         unlock_page(page);
2663         mapping_set_error(mapping, ret);
2664         return ret;
2665 }
2666
2667 static int cifs_writepages(struct address_space *mapping,
2668                            struct writeback_control *wbc)
2669 {
2670         struct inode *inode = mapping->host;
2671         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2672         struct TCP_Server_Info *server;
2673         bool done = false, scanned = false, range_whole = false;
2674         pgoff_t end, index;
2675         struct cifs_writedata *wdata;
2676         struct cifsFileInfo *cfile = NULL;
2677         int rc = 0;
2678         int saved_rc = 0;
2679         unsigned int xid;
2680
2681         /*
2682          * If wsize is smaller than the page cache size, default to writing
2683          * one page at a time.
2684          */
2685         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2686                 return write_cache_pages(mapping, wbc, cifs_write_one_page,
2687                                 mapping);
2688
2689         xid = get_xid();
2690         if (wbc->range_cyclic) {
2691                 index = mapping->writeback_index; /* Start from prev offset */
2692                 end = -1;
2693         } else {
2694                 index = wbc->range_start >> PAGE_SHIFT;
2695                 end = wbc->range_end >> PAGE_SHIFT;
2696                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2697                         range_whole = true;
2698                 scanned = true;
2699         }
2700         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2701
2702 retry:
2703         while (!done && index <= end) {
2704                 unsigned int i, nr_pages, found_pages, wsize;
2705                 pgoff_t next = 0, tofind, saved_index = index;
2706                 struct cifs_credits credits_on_stack;
2707                 struct cifs_credits *credits = &credits_on_stack;
2708                 int get_file_rc = 0;
2709
2710                 if (cfile)
2711                         cifsFileInfo_put(cfile);
2712
2713                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2714
2715                 /* in case of an error store it to return later */
2716                 if (rc)
2717                         get_file_rc = rc;
2718
2719                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2720                                                    &wsize, credits);
2721                 if (rc != 0) {
2722                         done = true;
2723                         break;
2724                 }
2725
2726                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2727
2728                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2729                                                   &found_pages);
2730                 if (!wdata) {
2731                         rc = -ENOMEM;
2732                         done = true;
2733                         add_credits_and_wake_if(server, credits, 0);
2734                         break;
2735                 }
2736
2737                 if (found_pages == 0) {
2738                         kref_put(&wdata->refcount, cifs_writedata_release);
2739                         add_credits_and_wake_if(server, credits, 0);
2740                         break;
2741                 }
2742
2743                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2744                                                end, &index, &next, &done);
2745
2746                 /* nothing to write? */
2747                 if (nr_pages == 0) {
2748                         kref_put(&wdata->refcount, cifs_writedata_release);
2749                         add_credits_and_wake_if(server, credits, 0);
2750                         continue;
2751                 }
2752
2753                 wdata->credits = credits_on_stack;
2754                 wdata->cfile = cfile;
2755                 wdata->server = server;
2756                 cfile = NULL;
2757
2758                 if (!wdata->cfile) {
2759                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2760                                  get_file_rc);
2761                         if (is_retryable_error(get_file_rc))
2762                                 rc = get_file_rc;
2763                         else
2764                                 rc = -EBADF;
2765                 } else
2766                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2767
2768                 for (i = 0; i < nr_pages; ++i)
2769                         unlock_page(wdata->pages[i]);
2770
2771                 /* send failure -- clean up the mess */
2772                 if (rc != 0) {
2773                         add_credits_and_wake_if(server, &wdata->credits, 0);
2774                         for (i = 0; i < nr_pages; ++i) {
2775                                 if (is_retryable_error(rc))
2776                                         redirty_page_for_writepage(wbc,
2777                                                            wdata->pages[i]);
2778                                 else
2779                                         SetPageError(wdata->pages[i]);
2780                                 end_page_writeback(wdata->pages[i]);
2781                                 put_page(wdata->pages[i]);
2782                         }
2783                         if (!is_retryable_error(rc))
2784                                 mapping_set_error(mapping, rc);
2785                 }
2786                 kref_put(&wdata->refcount, cifs_writedata_release);
2787
2788                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2789                         index = saved_index;
2790                         continue;
2791                 }
2792
2793                 /* Return immediately if we received a signal during writing */
2794                 if (is_interrupt_error(rc)) {
2795                         done = true;
2796                         break;
2797                 }
2798
2799                 if (rc != 0 && saved_rc == 0)
2800                         saved_rc = rc;
2801
2802                 wbc->nr_to_write -= nr_pages;
2803                 if (wbc->nr_to_write <= 0)
2804                         done = true;
2805
2806                 index = next;
2807         }
2808
2809         if (!scanned && !done) {
2810                 /*
2811                  * We hit the last page and there is more work to be done: wrap
2812                  * back to the start of the file
2813                  */
2814                 scanned = true;
2815                 index = 0;
2816                 goto retry;
2817         }
2818
2819         if (saved_rc != 0)
2820                 rc = saved_rc;
2821
2822         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2823                 mapping->writeback_index = index;
2824
2825         if (cfile)
2826                 cifsFileInfo_put(cfile);
2827         free_xid(xid);
2828         /* Indication to update ctime and mtime as close is deferred */
2829         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2830         return rc;
2831 }
2832
2833 static int
2834 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2835 {
2836         int rc;
2837         unsigned int xid;
2838
2839         xid = get_xid();
2840 /* BB add check for wbc flags */
2841         get_page(page);
2842         if (!PageUptodate(page))
2843                 cifs_dbg(FYI, "ppw - page not up to date\n");
2844
2845         /*
2846          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2847          *
2848          * A writepage() implementation always needs to do either this,
2849          * or re-dirty the page with "redirty_page_for_writepage()" in
2850          * the case of a failure.
2851          *
2852          * Just unlocking the page will cause the radix tree tag-bits
2853          * to fail to update with the state of the page correctly.
2854          */
2855         set_page_writeback(page);
2856 retry_write:
2857         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2858         if (is_retryable_error(rc)) {
2859                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2860                         goto retry_write;
2861                 redirty_page_for_writepage(wbc, page);
2862         } else if (rc != 0) {
2863                 SetPageError(page);
2864                 mapping_set_error(page->mapping, rc);
2865         } else {
2866                 SetPageUptodate(page);
2867         }
2868         end_page_writeback(page);
2869         put_page(page);
2870         free_xid(xid);
2871         return rc;
2872 }
2873
2874 static int cifs_write_end(struct file *file, struct address_space *mapping,
2875                         loff_t pos, unsigned len, unsigned copied,
2876                         struct page *page, void *fsdata)
2877 {
2878         int rc;
2879         struct inode *inode = mapping->host;
2880         struct cifsFileInfo *cfile = file->private_data;
2881         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2882         __u32 pid;
2883
2884         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2885                 pid = cfile->pid;
2886         else
2887                 pid = current->tgid;
2888
2889         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2890                  page, pos, copied);
2891
2892         if (PageChecked(page)) {
2893                 if (copied == len)
2894                         SetPageUptodate(page);
2895                 ClearPageChecked(page);
2896         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2897                 SetPageUptodate(page);
2898
2899         if (!PageUptodate(page)) {
2900                 char *page_data;
2901                 unsigned offset = pos & (PAGE_SIZE - 1);
2902                 unsigned int xid;
2903
2904                 xid = get_xid();
2905                 /* this is probably better than directly calling
2906                    partialpage_write since in this function the file handle is
2907                    known which we might as well leverage */
2908                 /* BB check if anything else missing out of ppw
2909                    such as updating last write time */
2910                 page_data = kmap(page);
2911                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2912                 /* if (rc < 0) should we set writebehind rc? */
2913                 kunmap(page);
2914
2915                 free_xid(xid);
2916         } else {
2917                 rc = copied;
2918                 pos += copied;
2919                 set_page_dirty(page);
2920         }
2921
2922         if (rc > 0) {
2923                 spin_lock(&inode->i_lock);
2924                 if (pos > inode->i_size) {
2925                         i_size_write(inode, pos);
2926                         inode->i_blocks = (512 - 1 + pos) >> 9;
2927                 }
2928                 spin_unlock(&inode->i_lock);
2929         }
2930
2931         unlock_page(page);
2932         put_page(page);
2933         /* Indication to update ctime and mtime as close is deferred */
2934         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2935
2936         return rc;
2937 }
2938
2939 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2940                       int datasync)
2941 {
2942         unsigned int xid;
2943         int rc = 0;
2944         struct cifs_tcon *tcon;
2945         struct TCP_Server_Info *server;
2946         struct cifsFileInfo *smbfile = file->private_data;
2947         struct inode *inode = file_inode(file);
2948         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2949
2950         rc = file_write_and_wait_range(file, start, end);
2951         if (rc) {
2952                 trace_cifs_fsync_err(inode->i_ino, rc);
2953                 return rc;
2954         }
2955
2956         xid = get_xid();
2957
2958         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2959                  file, datasync);
2960
2961         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2962                 rc = cifs_zap_mapping(inode);
2963                 if (rc) {
2964                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2965                         rc = 0; /* don't care about it in fsync */
2966                 }
2967         }
2968
2969         tcon = tlink_tcon(smbfile->tlink);
2970         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2971                 server = tcon->ses->server;
2972                 if (server->ops->flush == NULL) {
2973                         rc = -ENOSYS;
2974                         goto strict_fsync_exit;
2975                 }
2976
2977                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2978                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2979                         if (smbfile) {
2980                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2981                                 cifsFileInfo_put(smbfile);
2982                         } else
2983                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2984                 } else
2985                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2986         }
2987
2988 strict_fsync_exit:
2989         free_xid(xid);
2990         return rc;
2991 }
2992
2993 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2994 {
2995         unsigned int xid;
2996         int rc = 0;
2997         struct cifs_tcon *tcon;
2998         struct TCP_Server_Info *server;
2999         struct cifsFileInfo *smbfile = file->private_data;
3000         struct inode *inode = file_inode(file);
3001         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3002
3003         rc = file_write_and_wait_range(file, start, end);
3004         if (rc) {
3005                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3006                 return rc;
3007         }
3008
3009         xid = get_xid();
3010
3011         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3012                  file, datasync);
3013
3014         tcon = tlink_tcon(smbfile->tlink);
3015         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3016                 server = tcon->ses->server;
3017                 if (server->ops->flush == NULL) {
3018                         rc = -ENOSYS;
3019                         goto fsync_exit;
3020                 }
3021
3022                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3023                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3024                         if (smbfile) {
3025                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3026                                 cifsFileInfo_put(smbfile);
3027                         } else
3028                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3029                 } else
3030                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3031         }
3032
3033 fsync_exit:
3034         free_xid(xid);
3035         return rc;
3036 }
3037
3038 /*
3039  * As file closes, flush all cached write data for this inode checking
3040  * for write behind errors.
3041  */
3042 int cifs_flush(struct file *file, fl_owner_t id)
3043 {
3044         struct inode *inode = file_inode(file);
3045         int rc = 0;
3046
3047         if (file->f_mode & FMODE_WRITE)
3048                 rc = filemap_write_and_wait(inode->i_mapping);
3049
3050         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3051         if (rc) {
3052                 /* get more nuanced writeback errors */
3053                 rc = filemap_check_wb_err(file->f_mapping, 0);
3054                 trace_cifs_flush_err(inode->i_ino, rc);
3055         }
3056         return rc;
3057 }
3058
3059 static int
3060 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3061 {
3062         int rc = 0;
3063         unsigned long i;
3064
3065         for (i = 0; i < num_pages; i++) {
3066                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3067                 if (!pages[i]) {
3068                         /*
3069                          * save number of pages we have already allocated and
3070                          * return with ENOMEM error
3071                          */
3072                         num_pages = i;
3073                         rc = -ENOMEM;
3074                         break;
3075                 }
3076         }
3077
3078         if (rc) {
3079                 for (i = 0; i < num_pages; i++)
3080                         put_page(pages[i]);
3081         }
3082         return rc;
3083 }
3084
3085 static inline
3086 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3087 {
3088         size_t num_pages;
3089         size_t clen;
3090
3091         clen = min_t(const size_t, len, wsize);
3092         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3093
3094         if (cur_len)
3095                 *cur_len = clen;
3096
3097         return num_pages;
3098 }
3099
3100 static void
3101 cifs_uncached_writedata_release(struct kref *refcount)
3102 {
3103         int i;
3104         struct cifs_writedata *wdata = container_of(refcount,
3105                                         struct cifs_writedata, refcount);
3106
3107         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3108         for (i = 0; i < wdata->nr_pages; i++)
3109                 put_page(wdata->pages[i]);
3110         cifs_writedata_release(refcount);
3111 }
3112
3113 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3114
3115 static void
3116 cifs_uncached_writev_complete(struct work_struct *work)
3117 {
3118         struct cifs_writedata *wdata = container_of(work,
3119                                         struct cifs_writedata, work);
3120         struct inode *inode = d_inode(wdata->cfile->dentry);
3121         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3122
3123         spin_lock(&inode->i_lock);
3124         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3125         if (cifsi->server_eof > inode->i_size)
3126                 i_size_write(inode, cifsi->server_eof);
3127         spin_unlock(&inode->i_lock);
3128
3129         complete(&wdata->done);
3130         collect_uncached_write_data(wdata->ctx);
3131         /* the below call can possibly free the last ref to aio ctx */
3132         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3133 }
3134
3135 static int
3136 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3137                       size_t *len, unsigned long *num_pages)
3138 {
3139         size_t save_len, copied, bytes, cur_len = *len;
3140         unsigned long i, nr_pages = *num_pages;
3141
3142         save_len = cur_len;
3143         for (i = 0; i < nr_pages; i++) {
3144                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3145                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3146                 cur_len -= copied;
3147                 /*
3148                  * If we didn't copy as much as we expected, then that
3149                  * may mean we trod into an unmapped area. Stop copying
3150                  * at that point. On the next pass through the big
3151                  * loop, we'll likely end up getting a zero-length
3152                  * write and bailing out of it.
3153                  */
3154                 if (copied < bytes)
3155                         break;
3156         }
3157         cur_len = save_len - cur_len;
3158         *len = cur_len;
3159
3160         /*
3161          * If we have no data to send, then that probably means that
3162          * the copy above failed altogether. That's most likely because
3163          * the address in the iovec was bogus. Return -EFAULT and let
3164          * the caller free anything we allocated and bail out.
3165          */
3166         if (!cur_len)
3167                 return -EFAULT;
3168
3169         /*
3170          * i + 1 now represents the number of pages we actually used in
3171          * the copy phase above.
3172          */
3173         *num_pages = i + 1;
3174         return 0;
3175 }
3176
3177 static int
3178 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3179         struct cifs_aio_ctx *ctx)
3180 {
3181         unsigned int wsize;
3182         struct cifs_credits credits;
3183         int rc;
3184         struct TCP_Server_Info *server = wdata->server;
3185
3186         do {
3187                 if (wdata->cfile->invalidHandle) {
3188                         rc = cifs_reopen_file(wdata->cfile, false);
3189                         if (rc == -EAGAIN)
3190                                 continue;
3191                         else if (rc)
3192                                 break;
3193                 }
3194
3195
3196                 /*
3197                  * Wait for credits to resend this wdata.
3198                  * Note: we are attempting to resend the whole wdata not in
3199                  * segments
3200                  */
3201                 do {
3202                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3203                                                 &wsize, &credits);
3204                         if (rc)
3205                                 goto fail;
3206
3207                         if (wsize < wdata->bytes) {
3208                                 add_credits_and_wake_if(server, &credits, 0);
3209                                 msleep(1000);
3210                         }
3211                 } while (wsize < wdata->bytes);
3212                 wdata->credits = credits;
3213
3214                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3215
3216                 if (!rc) {
3217                         if (wdata->cfile->invalidHandle)
3218                                 rc = -EAGAIN;
3219                         else {
3220 #ifdef CONFIG_CIFS_SMB_DIRECT
3221                                 if (wdata->mr) {
3222                                         wdata->mr->need_invalidate = true;
3223                                         smbd_deregister_mr(wdata->mr);
3224                                         wdata->mr = NULL;
3225                                 }
3226 #endif
3227                                 rc = server->ops->async_writev(wdata,
3228                                         cifs_uncached_writedata_release);
3229                         }
3230                 }
3231
3232                 /* If the write was successfully sent, we are done */
3233                 if (!rc) {
3234                         list_add_tail(&wdata->list, wdata_list);
3235                         return 0;
3236                 }
3237
3238                 /* Roll back credits and retry if needed */
3239                 add_credits_and_wake_if(server, &wdata->credits, 0);
3240         } while (rc == -EAGAIN);
3241
3242 fail:
3243         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3244         return rc;
3245 }
3246
3247 static int
3248 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3249                      struct cifsFileInfo *open_file,
3250                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3251                      struct cifs_aio_ctx *ctx)
3252 {
3253         int rc = 0;
3254         size_t cur_len;
3255         unsigned long nr_pages, num_pages, i;
3256         struct cifs_writedata *wdata;
3257         struct iov_iter saved_from = *from;
3258         loff_t saved_offset = offset;
3259         pid_t pid;
3260         struct TCP_Server_Info *server;
3261         struct page **pagevec;
3262         size_t start;
3263         unsigned int xid;
3264
3265         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3266                 pid = open_file->pid;
3267         else
3268                 pid = current->tgid;
3269
3270         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3271         xid = get_xid();
3272
3273         do {
3274                 unsigned int wsize;
3275                 struct cifs_credits credits_on_stack;
3276                 struct cifs_credits *credits = &credits_on_stack;
3277
3278                 if (open_file->invalidHandle) {
3279                         rc = cifs_reopen_file(open_file, false);
3280                         if (rc == -EAGAIN)
3281                                 continue;
3282                         else if (rc)
3283                                 break;
3284                 }
3285
3286                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3287                                                    &wsize, credits);
3288                 if (rc)
3289                         break;
3290
3291                 cur_len = min_t(const size_t, len, wsize);
3292
3293                 if (ctx->direct_io) {
3294                         ssize_t result;
3295
3296                         result = iov_iter_get_pages_alloc2(
3297                                 from, &pagevec, cur_len, &start);
3298                         if (result < 0) {
3299                                 cifs_dbg(VFS,
3300                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3301                                          result, iov_iter_type(from),
3302                                          from->iov_offset, from->count);
3303                                 dump_stack();
3304
3305                                 rc = result;
3306                                 add_credits_and_wake_if(server, credits, 0);
3307                                 break;
3308                         }
3309                         cur_len = (size_t)result;
3310
3311                         nr_pages =
3312                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3313
3314                         wdata = cifs_writedata_direct_alloc(pagevec,
3315                                              cifs_uncached_writev_complete);
3316                         if (!wdata) {
3317                                 rc = -ENOMEM;
3318                                 for (i = 0; i < nr_pages; i++)
3319                                         put_page(pagevec[i]);
3320                                 kvfree(pagevec);
3321                                 add_credits_and_wake_if(server, credits, 0);
3322                                 break;
3323                         }
3324
3325
3326                         wdata->page_offset = start;
3327                         wdata->tailsz =
3328                                 nr_pages > 1 ?
3329                                         cur_len - (PAGE_SIZE - start) -
3330                                         (nr_pages - 2) * PAGE_SIZE :
3331                                         cur_len;
3332                 } else {
3333                         nr_pages = get_numpages(wsize, len, &cur_len);
3334                         wdata = cifs_writedata_alloc(nr_pages,
3335                                              cifs_uncached_writev_complete);
3336                         if (!wdata) {
3337                                 rc = -ENOMEM;
3338                                 add_credits_and_wake_if(server, credits, 0);
3339                                 break;
3340                         }
3341
3342                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3343                         if (rc) {
3344                                 kvfree(wdata->pages);
3345                                 kfree(wdata);
3346                                 add_credits_and_wake_if(server, credits, 0);
3347                                 break;
3348                         }
3349
3350                         num_pages = nr_pages;
3351                         rc = wdata_fill_from_iovec(
3352                                 wdata, from, &cur_len, &num_pages);
3353                         if (rc) {
3354                                 for (i = 0; i < nr_pages; i++)
3355                                         put_page(wdata->pages[i]);
3356                                 kvfree(wdata->pages);
3357                                 kfree(wdata);
3358                                 add_credits_and_wake_if(server, credits, 0);
3359                                 break;
3360                         }
3361
3362                         /*
3363                          * Bring nr_pages down to the number of pages we
3364                          * actually used, and free any pages that we didn't use.
3365                          */
3366                         for ( ; nr_pages > num_pages; nr_pages--)
3367                                 put_page(wdata->pages[nr_pages - 1]);
3368
3369                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3370                 }
3371
3372                 wdata->sync_mode = WB_SYNC_ALL;
3373                 wdata->nr_pages = nr_pages;
3374                 wdata->offset = (__u64)offset;
3375                 wdata->cfile = cifsFileInfo_get(open_file);
3376                 wdata->server = server;
3377                 wdata->pid = pid;
3378                 wdata->bytes = cur_len;
3379                 wdata->pagesz = PAGE_SIZE;
3380                 wdata->credits = credits_on_stack;
3381                 wdata->ctx = ctx;
3382                 kref_get(&ctx->refcount);
3383
3384                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3385
3386                 if (!rc) {
3387                         if (wdata->cfile->invalidHandle)
3388                                 rc = -EAGAIN;
3389                         else
3390                                 rc = server->ops->async_writev(wdata,
3391                                         cifs_uncached_writedata_release);
3392                 }
3393
3394                 if (rc) {
3395                         add_credits_and_wake_if(server, &wdata->credits, 0);
3396                         kref_put(&wdata->refcount,
3397                                  cifs_uncached_writedata_release);
3398                         if (rc == -EAGAIN) {
3399                                 *from = saved_from;
3400                                 iov_iter_advance(from, offset - saved_offset);
3401                                 continue;
3402                         }
3403                         break;
3404                 }
3405
3406                 list_add_tail(&wdata->list, wdata_list);
3407                 offset += cur_len;
3408                 len -= cur_len;
3409         } while (len > 0);
3410
3411         free_xid(xid);
3412         return rc;
3413 }
3414
3415 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3416 {
3417         struct cifs_writedata *wdata, *tmp;
3418         struct cifs_tcon *tcon;
3419         struct cifs_sb_info *cifs_sb;
3420         struct dentry *dentry = ctx->cfile->dentry;
3421         ssize_t rc;
3422
3423         tcon = tlink_tcon(ctx->cfile->tlink);
3424         cifs_sb = CIFS_SB(dentry->d_sb);
3425
3426         mutex_lock(&ctx->aio_mutex);
3427
3428         if (list_empty(&ctx->list)) {
3429                 mutex_unlock(&ctx->aio_mutex);
3430                 return;
3431         }
3432
3433         rc = ctx->rc;
3434         /*
3435          * Wait for and collect replies for any successful sends in order of
3436          * increasing offset. Once an error is hit, then return without waiting
3437          * for any more replies.
3438          */
3439 restart_loop:
3440         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3441                 if (!rc) {
3442                         if (!try_wait_for_completion(&wdata->done)) {
3443                                 mutex_unlock(&ctx->aio_mutex);
3444                                 return;
3445                         }
3446
3447                         if (wdata->result)
3448                                 rc = wdata->result;
3449                         else
3450                                 ctx->total_len += wdata->bytes;
3451
3452                         /* resend call if it's a retryable error */
3453                         if (rc == -EAGAIN) {
3454                                 struct list_head tmp_list;
3455                                 struct iov_iter tmp_from = ctx->iter;
3456
3457                                 INIT_LIST_HEAD(&tmp_list);
3458                                 list_del_init(&wdata->list);
3459
3460                                 if (ctx->direct_io)
3461                                         rc = cifs_resend_wdata(
3462                                                 wdata, &tmp_list, ctx);
3463                                 else {
3464                                         iov_iter_advance(&tmp_from,
3465                                                  wdata->offset - ctx->pos);
3466
3467                                         rc = cifs_write_from_iter(wdata->offset,
3468                                                 wdata->bytes, &tmp_from,
3469                                                 ctx->cfile, cifs_sb, &tmp_list,
3470                                                 ctx);
3471
3472                                         kref_put(&wdata->refcount,
3473                                                 cifs_uncached_writedata_release);
3474                                 }
3475
3476                                 list_splice(&tmp_list, &ctx->list);
3477                                 goto restart_loop;
3478                         }
3479                 }
3480                 list_del_init(&wdata->list);
3481                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3482         }
3483
3484         cifs_stats_bytes_written(tcon, ctx->total_len);
3485         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3486
3487         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3488
3489         mutex_unlock(&ctx->aio_mutex);
3490
3491         if (ctx->iocb && ctx->iocb->ki_complete)
3492                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3493         else
3494                 complete(&ctx->done);
3495 }
3496
3497 static ssize_t __cifs_writev(
3498         struct kiocb *iocb, struct iov_iter *from, bool direct)
3499 {
3500         struct file *file = iocb->ki_filp;
3501         ssize_t total_written = 0;
3502         struct cifsFileInfo *cfile;
3503         struct cifs_tcon *tcon;
3504         struct cifs_sb_info *cifs_sb;
3505         struct cifs_aio_ctx *ctx;
3506         struct iov_iter saved_from = *from;
3507         size_t len = iov_iter_count(from);
3508         int rc;
3509
3510         /*
3511          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3512          * In this case, fall back to non-direct write function.
3513          * this could be improved by getting pages directly in ITER_KVEC
3514          */
3515         if (direct && iov_iter_is_kvec(from)) {
3516                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3517                 direct = false;
3518         }
3519
3520         rc = generic_write_checks(iocb, from);
3521         if (rc <= 0)
3522                 return rc;
3523
3524         cifs_sb = CIFS_FILE_SB(file);
3525         cfile = file->private_data;
3526         tcon = tlink_tcon(cfile->tlink);
3527
3528         if (!tcon->ses->server->ops->async_writev)
3529                 return -ENOSYS;
3530
3531         ctx = cifs_aio_ctx_alloc();
3532         if (!ctx)
3533                 return -ENOMEM;
3534
3535         ctx->cfile = cifsFileInfo_get(cfile);
3536
3537         if (!is_sync_kiocb(iocb))
3538                 ctx->iocb = iocb;
3539
3540         ctx->pos = iocb->ki_pos;
3541
3542         if (direct) {
3543                 ctx->direct_io = true;
3544                 ctx->iter = *from;
3545                 ctx->len = len;
3546         } else {
3547                 rc = setup_aio_ctx_iter(ctx, from, ITER_SOURCE);
3548                 if (rc) {
3549                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3550                         return rc;
3551                 }
3552         }
3553
3554         /* grab a lock here due to read response handlers can access ctx */
3555         mutex_lock(&ctx->aio_mutex);
3556
3557         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3558                                   cfile, cifs_sb, &ctx->list, ctx);
3559
3560         /*
3561          * If at least one write was successfully sent, then discard any rc
3562          * value from the later writes. If the other write succeeds, then
3563          * we'll end up returning whatever was written. If it fails, then
3564          * we'll get a new rc value from that.
3565          */
3566         if (!list_empty(&ctx->list))
3567                 rc = 0;
3568
3569         mutex_unlock(&ctx->aio_mutex);
3570
3571         if (rc) {
3572                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3573                 return rc;
3574         }
3575
3576         if (!is_sync_kiocb(iocb)) {
3577                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3578                 return -EIOCBQUEUED;
3579         }
3580
3581         rc = wait_for_completion_killable(&ctx->done);
3582         if (rc) {
3583                 mutex_lock(&ctx->aio_mutex);
3584                 ctx->rc = rc = -EINTR;
3585                 total_written = ctx->total_len;
3586                 mutex_unlock(&ctx->aio_mutex);
3587         } else {
3588                 rc = ctx->rc;
3589                 total_written = ctx->total_len;
3590         }
3591
3592         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3593
3594         if (unlikely(!total_written))
3595                 return rc;
3596
3597         iocb->ki_pos += total_written;
3598         return total_written;
3599 }
3600
3601 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3602 {
3603         struct file *file = iocb->ki_filp;
3604
3605         cifs_revalidate_mapping(file->f_inode);
3606         return __cifs_writev(iocb, from, true);
3607 }
3608
3609 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3610 {
3611         return __cifs_writev(iocb, from, false);
3612 }
3613
3614 static ssize_t
3615 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3616 {
3617         struct file *file = iocb->ki_filp;
3618         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3619         struct inode *inode = file->f_mapping->host;
3620         struct cifsInodeInfo *cinode = CIFS_I(inode);
3621         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3622         ssize_t rc;
3623
3624         inode_lock(inode);
3625         /*
3626          * We need to hold the sem to be sure nobody modifies lock list
3627          * with a brlock that prevents writing.
3628          */
3629         down_read(&cinode->lock_sem);
3630
3631         rc = generic_write_checks(iocb, from);
3632         if (rc <= 0)
3633                 goto out;
3634
3635         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3636                                      server->vals->exclusive_lock_type, 0,
3637                                      NULL, CIFS_WRITE_OP))
3638                 rc = __generic_file_write_iter(iocb, from);
3639         else
3640                 rc = -EACCES;
3641 out:
3642         up_read(&cinode->lock_sem);
3643         inode_unlock(inode);
3644
3645         if (rc > 0)
3646                 rc = generic_write_sync(iocb, rc);
3647         return rc;
3648 }
3649
3650 ssize_t
3651 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3652 {
3653         struct inode *inode = file_inode(iocb->ki_filp);
3654         struct cifsInodeInfo *cinode = CIFS_I(inode);
3655         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3656         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3657                                                 iocb->ki_filp->private_data;
3658         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3659         ssize_t written;
3660
3661         written = cifs_get_writer(cinode);
3662         if (written)
3663                 return written;
3664
3665         if (CIFS_CACHE_WRITE(cinode)) {
3666                 if (cap_unix(tcon->ses) &&
3667                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3668                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3669                         written = generic_file_write_iter(iocb, from);
3670                         goto out;
3671                 }
3672                 written = cifs_writev(iocb, from);
3673                 goto out;
3674         }
3675         /*
3676          * For non-oplocked files in strict cache mode we need to write the data
3677          * to the server exactly from the pos to pos+len-1 rather than flush all
3678          * affected pages because it may cause a error with mandatory locks on
3679          * these pages but not on the region from pos to ppos+len-1.
3680          */
3681         written = cifs_user_writev(iocb, from);
3682         if (CIFS_CACHE_READ(cinode)) {
3683                 /*
3684                  * We have read level caching and we have just sent a write
3685                  * request to the server thus making data in the cache stale.
3686                  * Zap the cache and set oplock/lease level to NONE to avoid
3687                  * reading stale data from the cache. All subsequent read
3688                  * operations will read new data from the server.
3689                  */
3690                 cifs_zap_mapping(inode);
3691                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3692                          inode);
3693                 cinode->oplock = 0;
3694         }
3695 out:
3696         cifs_put_writer(cinode);
3697         return written;
3698 }
3699
3700 static struct cifs_readdata *
3701 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3702 {
3703         struct cifs_readdata *rdata;
3704
3705         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3706         if (rdata != NULL) {
3707                 rdata->pages = pages;
3708                 kref_init(&rdata->refcount);
3709                 INIT_LIST_HEAD(&rdata->list);
3710                 init_completion(&rdata->done);
3711                 INIT_WORK(&rdata->work, complete);
3712         }
3713
3714         return rdata;
3715 }
3716
3717 static struct cifs_readdata *
3718 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3719 {
3720         struct page **pages =
3721                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3722         struct cifs_readdata *ret = NULL;
3723
3724         if (pages) {
3725                 ret = cifs_readdata_direct_alloc(pages, complete);
3726                 if (!ret)
3727                         kfree(pages);
3728         }
3729
3730         return ret;
3731 }
3732
3733 void
3734 cifs_readdata_release(struct kref *refcount)
3735 {
3736         struct cifs_readdata *rdata = container_of(refcount,
3737                                         struct cifs_readdata, refcount);
3738 #ifdef CONFIG_CIFS_SMB_DIRECT
3739         if (rdata->mr) {
3740                 smbd_deregister_mr(rdata->mr);
3741                 rdata->mr = NULL;
3742         }
3743 #endif
3744         if (rdata->cfile)
3745                 cifsFileInfo_put(rdata->cfile);
3746
3747         kvfree(rdata->pages);
3748         kfree(rdata);
3749 }
3750
3751 static int
3752 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3753 {
3754         int rc = 0;
3755         struct page *page;
3756         unsigned int i;
3757
3758         for (i = 0; i < nr_pages; i++) {
3759                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3760                 if (!page) {
3761                         rc = -ENOMEM;
3762                         break;
3763                 }
3764                 rdata->pages[i] = page;
3765         }
3766
3767         if (rc) {
3768                 unsigned int nr_page_failed = i;
3769
3770                 for (i = 0; i < nr_page_failed; i++) {
3771                         put_page(rdata->pages[i]);
3772                         rdata->pages[i] = NULL;
3773                 }
3774         }
3775         return rc;
3776 }
3777
3778 static void
3779 cifs_uncached_readdata_release(struct kref *refcount)
3780 {
3781         struct cifs_readdata *rdata = container_of(refcount,
3782                                         struct cifs_readdata, refcount);
3783         unsigned int i;
3784
3785         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3786         for (i = 0; i < rdata->nr_pages; i++) {
3787                 put_page(rdata->pages[i]);
3788         }
3789         cifs_readdata_release(refcount);
3790 }
3791
3792 /**
3793  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3794  * @rdata:      the readdata response with list of pages holding data
3795  * @iter:       destination for our data
3796  *
3797  * This function copies data from a list of pages in a readdata response into
3798  * an array of iovecs. It will first calculate where the data should go
3799  * based on the info in the readdata and then copy the data into that spot.
3800  */
3801 static int
3802 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3803 {
3804         size_t remaining = rdata->got_bytes;
3805         unsigned int i;
3806
3807         for (i = 0; i < rdata->nr_pages; i++) {
3808                 struct page *page = rdata->pages[i];
3809                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3810                 size_t written;
3811
3812                 if (unlikely(iov_iter_is_pipe(iter))) {
3813                         void *addr = kmap_atomic(page);
3814
3815                         written = copy_to_iter(addr, copy, iter);
3816                         kunmap_atomic(addr);
3817                 } else
3818                         written = copy_page_to_iter(page, 0, copy, iter);
3819                 remaining -= written;
3820                 if (written < copy && iov_iter_count(iter) > 0)
3821                         break;
3822         }
3823         return remaining ? -EFAULT : 0;
3824 }
3825
3826 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3827
3828 static void
3829 cifs_uncached_readv_complete(struct work_struct *work)
3830 {
3831         struct cifs_readdata *rdata = container_of(work,
3832                                                 struct cifs_readdata, work);
3833
3834         complete(&rdata->done);
3835         collect_uncached_read_data(rdata->ctx);
3836         /* the below call can possibly free the last ref to aio ctx */
3837         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3838 }
3839
3840 static int
3841 uncached_fill_pages(struct TCP_Server_Info *server,
3842                     struct cifs_readdata *rdata, struct iov_iter *iter,
3843                     unsigned int len)
3844 {
3845         int result = 0;
3846         unsigned int i;
3847         unsigned int nr_pages = rdata->nr_pages;
3848         unsigned int page_offset = rdata->page_offset;
3849
3850         rdata->got_bytes = 0;
3851         rdata->tailsz = PAGE_SIZE;
3852         for (i = 0; i < nr_pages; i++) {
3853                 struct page *page = rdata->pages[i];
3854                 size_t n;
3855                 unsigned int segment_size = rdata->pagesz;
3856
3857                 if (i == 0)
3858                         segment_size -= page_offset;
3859                 else
3860                         page_offset = 0;
3861
3862
3863                 if (len <= 0) {
3864                         /* no need to hold page hostage */
3865                         rdata->pages[i] = NULL;
3866                         rdata->nr_pages--;
3867                         put_page(page);
3868                         continue;
3869                 }
3870
3871                 n = len;
3872                 if (len >= segment_size)
3873                         /* enough data to fill the page */
3874                         n = segment_size;
3875                 else
3876                         rdata->tailsz = len;
3877                 len -= n;
3878
3879                 if (iter)
3880                         result = copy_page_from_iter(
3881                                         page, page_offset, n, iter);
3882 #ifdef CONFIG_CIFS_SMB_DIRECT
3883                 else if (rdata->mr)
3884                         result = n;
3885 #endif
3886                 else
3887                         result = cifs_read_page_from_socket(
3888                                         server, page, page_offset, n);
3889                 if (result < 0)
3890                         break;
3891
3892                 rdata->got_bytes += result;
3893         }
3894
3895         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
3896                                                 rdata->got_bytes : result;
3897 }
3898
3899 static int
3900 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3901                               struct cifs_readdata *rdata, unsigned int len)
3902 {
3903         return uncached_fill_pages(server, rdata, NULL, len);
3904 }
3905
3906 static int
3907 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3908                               struct cifs_readdata *rdata,
3909                               struct iov_iter *iter)
3910 {
3911         return uncached_fill_pages(server, rdata, iter, iter->count);
3912 }
3913
3914 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3915                         struct list_head *rdata_list,
3916                         struct cifs_aio_ctx *ctx)
3917 {
3918         unsigned int rsize;
3919         struct cifs_credits credits;
3920         int rc;
3921         struct TCP_Server_Info *server;
3922
3923         /* XXX: should we pick a new channel here? */
3924         server = rdata->server;
3925
3926         do {
3927                 if (rdata->cfile->invalidHandle) {
3928                         rc = cifs_reopen_file(rdata->cfile, true);
3929                         if (rc == -EAGAIN)
3930                                 continue;
3931                         else if (rc)
3932                                 break;
3933                 }
3934
3935                 /*
3936                  * Wait for credits to resend this rdata.
3937                  * Note: we are attempting to resend the whole rdata not in
3938                  * segments
3939                  */
3940                 do {
3941                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3942                                                 &rsize, &credits);
3943
3944                         if (rc)
3945                                 goto fail;
3946
3947                         if (rsize < rdata->bytes) {
3948                                 add_credits_and_wake_if(server, &credits, 0);
3949                                 msleep(1000);
3950                         }
3951                 } while (rsize < rdata->bytes);
3952                 rdata->credits = credits;
3953
3954                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3955                 if (!rc) {
3956                         if (rdata->cfile->invalidHandle)
3957                                 rc = -EAGAIN;
3958                         else {
3959 #ifdef CONFIG_CIFS_SMB_DIRECT
3960                                 if (rdata->mr) {
3961                                         rdata->mr->need_invalidate = true;
3962                                         smbd_deregister_mr(rdata->mr);
3963                                         rdata->mr = NULL;
3964                                 }
3965 #endif
3966                                 rc = server->ops->async_readv(rdata);
3967                         }
3968                 }
3969
3970                 /* If the read was successfully sent, we are done */
3971                 if (!rc) {
3972                         /* Add to aio pending list */
3973                         list_add_tail(&rdata->list, rdata_list);
3974                         return 0;
3975                 }
3976
3977                 /* Roll back credits and retry if needed */
3978                 add_credits_and_wake_if(server, &rdata->credits, 0);
3979         } while (rc == -EAGAIN);
3980
3981 fail:
3982         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3983         return rc;
3984 }
3985
3986 static int
3987 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3988                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3989                      struct cifs_aio_ctx *ctx)
3990 {
3991         struct cifs_readdata *rdata;
3992         unsigned int npages, rsize;
3993         struct cifs_credits credits_on_stack;
3994         struct cifs_credits *credits = &credits_on_stack;
3995         size_t cur_len;
3996         int rc;
3997         pid_t pid;
3998         struct TCP_Server_Info *server;
3999         struct page **pagevec;
4000         size_t start;
4001         struct iov_iter direct_iov = ctx->iter;
4002
4003         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4004
4005         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4006                 pid = open_file->pid;
4007         else
4008                 pid = current->tgid;
4009
4010         if (ctx->direct_io)
4011                 iov_iter_advance(&direct_iov, offset - ctx->pos);
4012
4013         do {
4014                 if (open_file->invalidHandle) {
4015                         rc = cifs_reopen_file(open_file, true);
4016                         if (rc == -EAGAIN)
4017                                 continue;
4018                         else if (rc)
4019                                 break;
4020                 }
4021
4022                 if (cifs_sb->ctx->rsize == 0)
4023                         cifs_sb->ctx->rsize =
4024                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4025                                                              cifs_sb->ctx);
4026
4027                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4028                                                    &rsize, credits);
4029                 if (rc)
4030                         break;
4031
4032                 cur_len = min_t(const size_t, len, rsize);
4033
4034                 if (ctx->direct_io) {
4035                         ssize_t result;
4036
4037                         result = iov_iter_get_pages_alloc2(
4038                                         &direct_iov, &pagevec,
4039                                         cur_len, &start);
4040                         if (result < 0) {
4041                                 cifs_dbg(VFS,
4042                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4043                                          result, iov_iter_type(&direct_iov),
4044                                          direct_iov.iov_offset,
4045                                          direct_iov.count);
4046                                 dump_stack();
4047
4048                                 rc = result;
4049                                 add_credits_and_wake_if(server, credits, 0);
4050                                 break;
4051                         }
4052                         cur_len = (size_t)result;
4053
4054                         rdata = cifs_readdata_direct_alloc(
4055                                         pagevec, cifs_uncached_readv_complete);
4056                         if (!rdata) {
4057                                 add_credits_and_wake_if(server, credits, 0);
4058                                 rc = -ENOMEM;
4059                                 break;
4060                         }
4061
4062                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4063                         rdata->page_offset = start;
4064                         rdata->tailsz = npages > 1 ?
4065                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4066                                 cur_len;
4067
4068                 } else {
4069
4070                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4071                         /* allocate a readdata struct */
4072                         rdata = cifs_readdata_alloc(npages,
4073                                             cifs_uncached_readv_complete);
4074                         if (!rdata) {
4075                                 add_credits_and_wake_if(server, credits, 0);
4076                                 rc = -ENOMEM;
4077                                 break;
4078                         }
4079
4080                         rc = cifs_read_allocate_pages(rdata, npages);
4081                         if (rc) {
4082                                 kvfree(rdata->pages);
4083                                 kfree(rdata);
4084                                 add_credits_and_wake_if(server, credits, 0);
4085                                 break;
4086                         }
4087
4088                         rdata->tailsz = PAGE_SIZE;
4089                 }
4090
4091                 rdata->server = server;
4092                 rdata->cfile = cifsFileInfo_get(open_file);
4093                 rdata->nr_pages = npages;
4094                 rdata->offset = offset;
4095                 rdata->bytes = cur_len;
4096                 rdata->pid = pid;
4097                 rdata->pagesz = PAGE_SIZE;
4098                 rdata->read_into_pages = cifs_uncached_read_into_pages;
4099                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4100                 rdata->credits = credits_on_stack;
4101                 rdata->ctx = ctx;
4102                 kref_get(&ctx->refcount);
4103
4104                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4105
4106                 if (!rc) {
4107                         if (rdata->cfile->invalidHandle)
4108                                 rc = -EAGAIN;
4109                         else
4110                                 rc = server->ops->async_readv(rdata);
4111                 }
4112
4113                 if (rc) {
4114                         add_credits_and_wake_if(server, &rdata->credits, 0);
4115                         kref_put(&rdata->refcount,
4116                                 cifs_uncached_readdata_release);
4117                         if (rc == -EAGAIN) {
4118                                 iov_iter_revert(&direct_iov, cur_len);
4119                                 continue;
4120                         }
4121                         break;
4122                 }
4123
4124                 list_add_tail(&rdata->list, rdata_list);
4125                 offset += cur_len;
4126                 len -= cur_len;
4127         } while (len > 0);
4128
4129         return rc;
4130 }
4131
4132 static void
4133 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4134 {
4135         struct cifs_readdata *rdata, *tmp;
4136         struct iov_iter *to = &ctx->iter;
4137         struct cifs_sb_info *cifs_sb;
4138         int rc;
4139
4140         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4141
4142         mutex_lock(&ctx->aio_mutex);
4143
4144         if (list_empty(&ctx->list)) {
4145                 mutex_unlock(&ctx->aio_mutex);
4146                 return;
4147         }
4148
4149         rc = ctx->rc;
4150         /* the loop below should proceed in the order of increasing offsets */
4151 again:
4152         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4153                 if (!rc) {
4154                         if (!try_wait_for_completion(&rdata->done)) {
4155                                 mutex_unlock(&ctx->aio_mutex);
4156                                 return;
4157                         }
4158
4159                         if (rdata->result == -EAGAIN) {
4160                                 /* resend call if it's a retryable error */
4161                                 struct list_head tmp_list;
4162                                 unsigned int got_bytes = rdata->got_bytes;
4163
4164                                 list_del_init(&rdata->list);
4165                                 INIT_LIST_HEAD(&tmp_list);
4166
4167                                 /*
4168                                  * Got a part of data and then reconnect has
4169                                  * happened -- fill the buffer and continue
4170                                  * reading.
4171                                  */
4172                                 if (got_bytes && got_bytes < rdata->bytes) {
4173                                         rc = 0;
4174                                         if (!ctx->direct_io)
4175                                                 rc = cifs_readdata_to_iov(rdata, to);
4176                                         if (rc) {
4177                                                 kref_put(&rdata->refcount,
4178                                                         cifs_uncached_readdata_release);
4179                                                 continue;
4180                                         }
4181                                 }
4182
4183                                 if (ctx->direct_io) {
4184                                         /*
4185                                          * Re-use rdata as this is a
4186                                          * direct I/O
4187                                          */
4188                                         rc = cifs_resend_rdata(
4189                                                 rdata,
4190                                                 &tmp_list, ctx);
4191                                 } else {
4192                                         rc = cifs_send_async_read(
4193                                                 rdata->offset + got_bytes,
4194                                                 rdata->bytes - got_bytes,
4195                                                 rdata->cfile, cifs_sb,
4196                                                 &tmp_list, ctx);
4197
4198                                         kref_put(&rdata->refcount,
4199                                                 cifs_uncached_readdata_release);
4200                                 }
4201
4202                                 list_splice(&tmp_list, &ctx->list);
4203
4204                                 goto again;
4205                         } else if (rdata->result)
4206                                 rc = rdata->result;
4207                         else if (!ctx->direct_io)
4208                                 rc = cifs_readdata_to_iov(rdata, to);
4209
4210                         /* if there was a short read -- discard anything left */
4211                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4212                                 rc = -ENODATA;
4213
4214                         ctx->total_len += rdata->got_bytes;
4215                 }
4216                 list_del_init(&rdata->list);
4217                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4218         }
4219
4220         if (!ctx->direct_io)
4221                 ctx->total_len = ctx->len - iov_iter_count(to);
4222
4223         /* mask nodata case */
4224         if (rc == -ENODATA)
4225                 rc = 0;
4226
4227         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4228
4229         mutex_unlock(&ctx->aio_mutex);
4230
4231         if (ctx->iocb && ctx->iocb->ki_complete)
4232                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4233         else
4234                 complete(&ctx->done);
4235 }
4236
4237 static ssize_t __cifs_readv(
4238         struct kiocb *iocb, struct iov_iter *to, bool direct)
4239 {
4240         size_t len;
4241         struct file *file = iocb->ki_filp;
4242         struct cifs_sb_info *cifs_sb;
4243         struct cifsFileInfo *cfile;
4244         struct cifs_tcon *tcon;
4245         ssize_t rc, total_read = 0;
4246         loff_t offset = iocb->ki_pos;
4247         struct cifs_aio_ctx *ctx;
4248
4249         /*
4250          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4251          * fall back to data copy read path
4252          * this could be improved by getting pages directly in ITER_KVEC
4253          */
4254         if (direct && iov_iter_is_kvec(to)) {
4255                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4256                 direct = false;
4257         }
4258
4259         len = iov_iter_count(to);
4260         if (!len)
4261                 return 0;
4262
4263         cifs_sb = CIFS_FILE_SB(file);
4264         cfile = file->private_data;
4265         tcon = tlink_tcon(cfile->tlink);
4266
4267         if (!tcon->ses->server->ops->async_readv)
4268                 return -ENOSYS;
4269
4270         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4271                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4272
4273         ctx = cifs_aio_ctx_alloc();
4274         if (!ctx)
4275                 return -ENOMEM;
4276
4277         ctx->cfile = cifsFileInfo_get(cfile);
4278
4279         if (!is_sync_kiocb(iocb))
4280                 ctx->iocb = iocb;
4281
4282         if (user_backed_iter(to))
4283                 ctx->should_dirty = true;
4284
4285         if (direct) {
4286                 ctx->pos = offset;
4287                 ctx->direct_io = true;
4288                 ctx->iter = *to;
4289                 ctx->len = len;
4290         } else {
4291                 rc = setup_aio_ctx_iter(ctx, to, ITER_DEST);
4292                 if (rc) {
4293                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4294                         return rc;
4295                 }
4296                 len = ctx->len;
4297         }
4298
4299         if (direct) {
4300                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4301                                                   offset, offset + len - 1);
4302                 if (rc) {
4303                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4304                         return -EAGAIN;
4305                 }
4306         }
4307
4308         /* grab a lock here due to read response handlers can access ctx */
4309         mutex_lock(&ctx->aio_mutex);
4310
4311         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4312
4313         /* if at least one read request send succeeded, then reset rc */
4314         if (!list_empty(&ctx->list))
4315                 rc = 0;
4316
4317         mutex_unlock(&ctx->aio_mutex);
4318
4319         if (rc) {
4320                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4321                 return rc;
4322         }
4323
4324         if (!is_sync_kiocb(iocb)) {
4325                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4326                 return -EIOCBQUEUED;
4327         }
4328
4329         rc = wait_for_completion_killable(&ctx->done);
4330         if (rc) {
4331                 mutex_lock(&ctx->aio_mutex);
4332                 ctx->rc = rc = -EINTR;
4333                 total_read = ctx->total_len;
4334                 mutex_unlock(&ctx->aio_mutex);
4335         } else {
4336                 rc = ctx->rc;
4337                 total_read = ctx->total_len;
4338         }
4339
4340         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4341
4342         if (total_read) {
4343                 iocb->ki_pos += total_read;
4344                 return total_read;
4345         }
4346         return rc;
4347 }
4348
4349 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4350 {
4351         return __cifs_readv(iocb, to, true);
4352 }
4353
4354 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4355 {
4356         return __cifs_readv(iocb, to, false);
4357 }
4358
4359 ssize_t
4360 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4361 {
4362         struct inode *inode = file_inode(iocb->ki_filp);
4363         struct cifsInodeInfo *cinode = CIFS_I(inode);
4364         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4365         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4366                                                 iocb->ki_filp->private_data;
4367         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4368         int rc = -EACCES;
4369
4370         /*
4371          * In strict cache mode we need to read from the server all the time
4372          * if we don't have level II oplock because the server can delay mtime
4373          * change - so we can't make a decision about inode invalidating.
4374          * And we can also fail with pagereading if there are mandatory locks
4375          * on pages affected by this read but not on the region from pos to
4376          * pos+len-1.
4377          */
4378         if (!CIFS_CACHE_READ(cinode))
4379                 return cifs_user_readv(iocb, to);
4380
4381         if (cap_unix(tcon->ses) &&
4382             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4383             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4384                 return generic_file_read_iter(iocb, to);
4385
4386         /*
4387          * We need to hold the sem to be sure nobody modifies lock list
4388          * with a brlock that prevents reading.
4389          */
4390         down_read(&cinode->lock_sem);
4391         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4392                                      tcon->ses->server->vals->shared_lock_type,
4393                                      0, NULL, CIFS_READ_OP))
4394                 rc = generic_file_read_iter(iocb, to);
4395         up_read(&cinode->lock_sem);
4396         return rc;
4397 }
4398
4399 static ssize_t
4400 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4401 {
4402         int rc = -EACCES;
4403         unsigned int bytes_read = 0;
4404         unsigned int total_read;
4405         unsigned int current_read_size;
4406         unsigned int rsize;
4407         struct cifs_sb_info *cifs_sb;
4408         struct cifs_tcon *tcon;
4409         struct TCP_Server_Info *server;
4410         unsigned int xid;
4411         char *cur_offset;
4412         struct cifsFileInfo *open_file;
4413         struct cifs_io_parms io_parms = {0};
4414         int buf_type = CIFS_NO_BUFFER;
4415         __u32 pid;
4416
4417         xid = get_xid();
4418         cifs_sb = CIFS_FILE_SB(file);
4419
4420         /* FIXME: set up handlers for larger reads and/or convert to async */
4421         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4422
4423         if (file->private_data == NULL) {
4424                 rc = -EBADF;
4425                 free_xid(xid);
4426                 return rc;
4427         }
4428         open_file = file->private_data;
4429         tcon = tlink_tcon(open_file->tlink);
4430         server = cifs_pick_channel(tcon->ses);
4431
4432         if (!server->ops->sync_read) {
4433                 free_xid(xid);
4434                 return -ENOSYS;
4435         }
4436
4437         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4438                 pid = open_file->pid;
4439         else
4440                 pid = current->tgid;
4441
4442         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4443                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4444
4445         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4446              total_read += bytes_read, cur_offset += bytes_read) {
4447                 do {
4448                         current_read_size = min_t(uint, read_size - total_read,
4449                                                   rsize);
4450                         /*
4451                          * For windows me and 9x we do not want to request more
4452                          * than it negotiated since it will refuse the read
4453                          * then.
4454                          */
4455                         if (!(tcon->ses->capabilities &
4456                                 tcon->ses->server->vals->cap_large_files)) {
4457                                 current_read_size = min_t(uint,
4458                                         current_read_size, CIFSMaxBufSize);
4459                         }
4460                         if (open_file->invalidHandle) {
4461                                 rc = cifs_reopen_file(open_file, true);
4462                                 if (rc != 0)
4463                                         break;
4464                         }
4465                         io_parms.pid = pid;
4466                         io_parms.tcon = tcon;
4467                         io_parms.offset = *offset;
4468                         io_parms.length = current_read_size;
4469                         io_parms.server = server;
4470                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4471                                                     &bytes_read, &cur_offset,
4472                                                     &buf_type);
4473                 } while (rc == -EAGAIN);
4474
4475                 if (rc || (bytes_read == 0)) {
4476                         if (total_read) {
4477                                 break;
4478                         } else {
4479                                 free_xid(xid);
4480                                 return rc;
4481                         }
4482                 } else {
4483                         cifs_stats_bytes_read(tcon, total_read);
4484                         *offset += bytes_read;
4485                 }
4486         }
4487         free_xid(xid);
4488         return total_read;
4489 }
4490
4491 /*
4492  * If the page is mmap'ed into a process' page tables, then we need to make
4493  * sure that it doesn't change while being written back.
4494  */
4495 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4496 {
4497         struct folio *folio = page_folio(vmf->page);
4498
4499         /* Wait for the folio to be written to the cache before we allow it to
4500          * be modified.  We then assume the entire folio will need writing back.
4501          */
4502 #ifdef CONFIG_CIFS_FSCACHE
4503         if (folio_test_fscache(folio) &&
4504             folio_wait_fscache_killable(folio) < 0)
4505                 return VM_FAULT_RETRY;
4506 #endif
4507
4508         folio_wait_writeback(folio);
4509
4510         if (folio_lock_killable(folio) < 0)
4511                 return VM_FAULT_RETRY;
4512         return VM_FAULT_LOCKED;
4513 }
4514
4515 static const struct vm_operations_struct cifs_file_vm_ops = {
4516         .fault = filemap_fault,
4517         .map_pages = filemap_map_pages,
4518         .page_mkwrite = cifs_page_mkwrite,
4519 };
4520
4521 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4522 {
4523         int xid, rc = 0;
4524         struct inode *inode = file_inode(file);
4525
4526         xid = get_xid();
4527
4528         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4529                 rc = cifs_zap_mapping(inode);
4530         if (!rc)
4531                 rc = generic_file_mmap(file, vma);
4532         if (!rc)
4533                 vma->vm_ops = &cifs_file_vm_ops;
4534
4535         free_xid(xid);
4536         return rc;
4537 }
4538
4539 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4540 {
4541         int rc, xid;
4542
4543         xid = get_xid();
4544
4545         rc = cifs_revalidate_file(file);
4546         if (rc)
4547                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4548                          rc);
4549         if (!rc)
4550                 rc = generic_file_mmap(file, vma);
4551         if (!rc)
4552                 vma->vm_ops = &cifs_file_vm_ops;
4553
4554         free_xid(xid);
4555         return rc;
4556 }
4557
4558 static void
4559 cifs_readv_complete(struct work_struct *work)
4560 {
4561         unsigned int i, got_bytes;
4562         struct cifs_readdata *rdata = container_of(work,
4563                                                 struct cifs_readdata, work);
4564
4565         got_bytes = rdata->got_bytes;
4566         for (i = 0; i < rdata->nr_pages; i++) {
4567                 struct page *page = rdata->pages[i];
4568
4569                 if (rdata->result == 0 ||
4570                     (rdata->result == -EAGAIN && got_bytes)) {
4571                         flush_dcache_page(page);
4572                         SetPageUptodate(page);
4573                 } else
4574                         SetPageError(page);
4575
4576                 if (rdata->result == 0 ||
4577                     (rdata->result == -EAGAIN && got_bytes))
4578                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4579
4580                 unlock_page(page);
4581
4582                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4583
4584                 put_page(page);
4585                 rdata->pages[i] = NULL;
4586         }
4587         kref_put(&rdata->refcount, cifs_readdata_release);
4588 }
4589
4590 static int
4591 readpages_fill_pages(struct TCP_Server_Info *server,
4592                      struct cifs_readdata *rdata, struct iov_iter *iter,
4593                      unsigned int len)
4594 {
4595         int result = 0;
4596         unsigned int i;
4597         u64 eof;
4598         pgoff_t eof_index;
4599         unsigned int nr_pages = rdata->nr_pages;
4600         unsigned int page_offset = rdata->page_offset;
4601
4602         /* determine the eof that the server (probably) has */
4603         eof = CIFS_I(rdata->mapping->host)->server_eof;
4604         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4605         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4606
4607         rdata->got_bytes = 0;
4608         rdata->tailsz = PAGE_SIZE;
4609         for (i = 0; i < nr_pages; i++) {
4610                 struct page *page = rdata->pages[i];
4611                 unsigned int to_read = rdata->pagesz;
4612                 size_t n;
4613
4614                 if (i == 0)
4615                         to_read -= page_offset;
4616                 else
4617                         page_offset = 0;
4618
4619                 n = to_read;
4620
4621                 if (len >= to_read) {
4622                         len -= to_read;
4623                 } else if (len > 0) {
4624                         /* enough for partial page, fill and zero the rest */
4625                         zero_user(page, len + page_offset, to_read - len);
4626                         n = rdata->tailsz = len;
4627                         len = 0;
4628                 } else if (page->index > eof_index) {
4629                         /*
4630                          * The VFS will not try to do readahead past the
4631                          * i_size, but it's possible that we have outstanding
4632                          * writes with gaps in the middle and the i_size hasn't
4633                          * caught up yet. Populate those with zeroed out pages
4634                          * to prevent the VFS from repeatedly attempting to
4635                          * fill them until the writes are flushed.
4636                          */
4637                         zero_user(page, 0, PAGE_SIZE);
4638                         flush_dcache_page(page);
4639                         SetPageUptodate(page);
4640                         unlock_page(page);
4641                         put_page(page);
4642                         rdata->pages[i] = NULL;
4643                         rdata->nr_pages--;
4644                         continue;
4645                 } else {
4646                         /* no need to hold page hostage */
4647                         unlock_page(page);
4648                         put_page(page);
4649                         rdata->pages[i] = NULL;
4650                         rdata->nr_pages--;
4651                         continue;
4652                 }
4653
4654                 if (iter)
4655                         result = copy_page_from_iter(
4656                                         page, page_offset, n, iter);
4657 #ifdef CONFIG_CIFS_SMB_DIRECT
4658                 else if (rdata->mr)
4659                         result = n;
4660 #endif
4661                 else
4662                         result = cifs_read_page_from_socket(
4663                                         server, page, page_offset, n);
4664                 if (result < 0)
4665                         break;
4666
4667                 rdata->got_bytes += result;
4668         }
4669
4670         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
4671                                                 rdata->got_bytes : result;
4672 }
4673
4674 static int
4675 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4676                                struct cifs_readdata *rdata, unsigned int len)
4677 {
4678         return readpages_fill_pages(server, rdata, NULL, len);
4679 }
4680
4681 static int
4682 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4683                                struct cifs_readdata *rdata,
4684                                struct iov_iter *iter)
4685 {
4686         return readpages_fill_pages(server, rdata, iter, iter->count);
4687 }
4688
4689 static void cifs_readahead(struct readahead_control *ractl)
4690 {
4691         int rc;
4692         struct cifsFileInfo *open_file = ractl->file->private_data;
4693         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4694         struct TCP_Server_Info *server;
4695         pid_t pid;
4696         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4697         pgoff_t next_cached = ULONG_MAX;
4698         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4699                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4700         bool check_cache = caching;
4701
4702         xid = get_xid();
4703
4704         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4705                 pid = open_file->pid;
4706         else
4707                 pid = current->tgid;
4708
4709         rc = 0;
4710         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4711
4712         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4713                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4714
4715         /*
4716          * Chop the readahead request up into rsize-sized read requests.
4717          */
4718         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4719                 unsigned int i, got, rsize;
4720                 struct page *page;
4721                 struct cifs_readdata *rdata;
4722                 struct cifs_credits credits_on_stack;
4723                 struct cifs_credits *credits = &credits_on_stack;
4724                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4725
4726                 /*
4727                  * Find out if we have anything cached in the range of
4728                  * interest, and if so, where the next chunk of cached data is.
4729                  */
4730                 if (caching) {
4731                         if (check_cache) {
4732                                 rc = cifs_fscache_query_occupancy(
4733                                         ractl->mapping->host, index, nr_pages,
4734                                         &next_cached, &cache_nr_pages);
4735                                 if (rc < 0)
4736                                         caching = false;
4737                                 check_cache = false;
4738                         }
4739
4740                         if (index == next_cached) {
4741                                 /*
4742                                  * TODO: Send a whole batch of pages to be read
4743                                  * by the cache.
4744                                  */
4745                                 struct folio *folio = readahead_folio(ractl);
4746
4747                                 last_batch_size = folio_nr_pages(folio);
4748                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4749                                                                &folio->page) < 0) {
4750                                         /*
4751                                          * TODO: Deal with cache read failure
4752                                          * here, but for the moment, delegate
4753                                          * that to readpage.
4754                                          */
4755                                         caching = false;
4756                                 }
4757                                 folio_unlock(folio);
4758                                 next_cached++;
4759                                 cache_nr_pages--;
4760                                 if (cache_nr_pages == 0)
4761                                         check_cache = true;
4762                                 continue;
4763                         }
4764                 }
4765
4766                 if (open_file->invalidHandle) {
4767                         rc = cifs_reopen_file(open_file, true);
4768                         if (rc) {
4769                                 if (rc == -EAGAIN)
4770                                         continue;
4771                                 break;
4772                         }
4773                 }
4774
4775                 if (cifs_sb->ctx->rsize == 0)
4776                         cifs_sb->ctx->rsize =
4777                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4778                                                              cifs_sb->ctx);
4779
4780                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4781                                                    &rsize, credits);
4782                 if (rc)
4783                         break;
4784                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4785                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4786
4787                 /*
4788                  * Give up immediately if rsize is too small to read an entire
4789                  * page. The VFS will fall back to readpage. We should never
4790                  * reach this point however since we set ra_pages to 0 when the
4791                  * rsize is smaller than a cache page.
4792                  */
4793                 if (unlikely(!nr_pages)) {
4794                         add_credits_and_wake_if(server, credits, 0);
4795                         break;
4796                 }
4797
4798                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4799                 if (!rdata) {
4800                         /* best to give up if we're out of mem */
4801                         add_credits_and_wake_if(server, credits, 0);
4802                         break;
4803                 }
4804
4805                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4806                 if (got != nr_pages) {
4807                         pr_warn("__readahead_batch() returned %u/%u\n",
4808                                 got, nr_pages);
4809                         nr_pages = got;
4810                 }
4811
4812                 rdata->nr_pages = nr_pages;
4813                 rdata->bytes    = readahead_batch_length(ractl);
4814                 rdata->cfile    = cifsFileInfo_get(open_file);
4815                 rdata->server   = server;
4816                 rdata->mapping  = ractl->mapping;
4817                 rdata->offset   = readahead_pos(ractl);
4818                 rdata->pid      = pid;
4819                 rdata->pagesz   = PAGE_SIZE;
4820                 rdata->tailsz   = PAGE_SIZE;
4821                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4822                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4823                 rdata->credits  = credits_on_stack;
4824
4825                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4826                 if (!rc) {
4827                         if (rdata->cfile->invalidHandle)
4828                                 rc = -EAGAIN;
4829                         else
4830                                 rc = server->ops->async_readv(rdata);
4831                 }
4832
4833                 if (rc) {
4834                         add_credits_and_wake_if(server, &rdata->credits, 0);
4835                         for (i = 0; i < rdata->nr_pages; i++) {
4836                                 page = rdata->pages[i];
4837                                 unlock_page(page);
4838                                 put_page(page);
4839                         }
4840                         /* Fallback to the readpage in error/reconnect cases */
4841                         kref_put(&rdata->refcount, cifs_readdata_release);
4842                         break;
4843                 }
4844
4845                 kref_put(&rdata->refcount, cifs_readdata_release);
4846                 last_batch_size = nr_pages;
4847         }
4848
4849         free_xid(xid);
4850 }
4851
4852 /*
4853  * cifs_readpage_worker must be called with the page pinned
4854  */
4855 static int cifs_readpage_worker(struct file *file, struct page *page,
4856         loff_t *poffset)
4857 {
4858         char *read_data;
4859         int rc;
4860
4861         /* Is the page cached? */
4862         rc = cifs_readpage_from_fscache(file_inode(file), page);
4863         if (rc == 0)
4864                 goto read_complete;
4865
4866         read_data = kmap(page);
4867         /* for reads over a certain size could initiate async read ahead */
4868
4869         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4870
4871         if (rc < 0)
4872                 goto io_error;
4873         else
4874                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4875
4876         /* we do not want atime to be less than mtime, it broke some apps */
4877         file_inode(file)->i_atime = current_time(file_inode(file));
4878         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4879                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4880         else
4881                 file_inode(file)->i_atime = current_time(file_inode(file));
4882
4883         if (PAGE_SIZE > rc)
4884                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4885
4886         flush_dcache_page(page);
4887         SetPageUptodate(page);
4888
4889         /* send this page to the cache */
4890         cifs_readpage_to_fscache(file_inode(file), page);
4891
4892         rc = 0;
4893
4894 io_error:
4895         kunmap(page);
4896         unlock_page(page);
4897
4898 read_complete:
4899         return rc;
4900 }
4901
4902 static int cifs_read_folio(struct file *file, struct folio *folio)
4903 {
4904         struct page *page = &folio->page;
4905         loff_t offset = page_file_offset(page);
4906         int rc = -EACCES;
4907         unsigned int xid;
4908
4909         xid = get_xid();
4910
4911         if (file->private_data == NULL) {
4912                 rc = -EBADF;
4913                 free_xid(xid);
4914                 return rc;
4915         }
4916
4917         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4918                  page, (int)offset, (int)offset);
4919
4920         rc = cifs_readpage_worker(file, page, &offset);
4921
4922         free_xid(xid);
4923         return rc;
4924 }
4925
4926 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4927 {
4928         struct cifsFileInfo *open_file;
4929
4930         spin_lock(&cifs_inode->open_file_lock);
4931         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4932                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4933                         spin_unlock(&cifs_inode->open_file_lock);
4934                         return 1;
4935                 }
4936         }
4937         spin_unlock(&cifs_inode->open_file_lock);
4938         return 0;
4939 }
4940
4941 /* We do not want to update the file size from server for inodes
4942    open for write - to avoid races with writepage extending
4943    the file - in the future we could consider allowing
4944    refreshing the inode only on increases in the file size
4945    but this is tricky to do without racing with writebehind
4946    page caching in the current Linux kernel design */
4947 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4948 {
4949         if (!cifsInode)
4950                 return true;
4951
4952         if (is_inode_writable(cifsInode)) {
4953                 /* This inode is open for write at least once */
4954                 struct cifs_sb_info *cifs_sb;
4955
4956                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4957                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4958                         /* since no page cache to corrupt on directio
4959                         we can change size safely */
4960                         return true;
4961                 }
4962
4963                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4964                         return true;
4965
4966                 return false;
4967         } else
4968                 return true;
4969 }
4970
4971 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4972                         loff_t pos, unsigned len,
4973                         struct page **pagep, void **fsdata)
4974 {
4975         int oncethru = 0;
4976         pgoff_t index = pos >> PAGE_SHIFT;
4977         loff_t offset = pos & (PAGE_SIZE - 1);
4978         loff_t page_start = pos & PAGE_MASK;
4979         loff_t i_size;
4980         struct page *page;
4981         int rc = 0;
4982
4983         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4984
4985 start:
4986         page = grab_cache_page_write_begin(mapping, index);
4987         if (!page) {
4988                 rc = -ENOMEM;
4989                 goto out;
4990         }
4991
4992         if (PageUptodate(page))
4993                 goto out;
4994
4995         /*
4996          * If we write a full page it will be up to date, no need to read from
4997          * the server. If the write is short, we'll end up doing a sync write
4998          * instead.
4999          */
5000         if (len == PAGE_SIZE)
5001                 goto out;
5002
5003         /*
5004          * optimize away the read when we have an oplock, and we're not
5005          * expecting to use any of the data we'd be reading in. That
5006          * is, when the page lies beyond the EOF, or straddles the EOF
5007          * and the write will cover all of the existing data.
5008          */
5009         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
5010                 i_size = i_size_read(mapping->host);
5011                 if (page_start >= i_size ||
5012                     (offset == 0 && (pos + len) >= i_size)) {
5013                         zero_user_segments(page, 0, offset,
5014                                            offset + len,
5015                                            PAGE_SIZE);
5016                         /*
5017                          * PageChecked means that the parts of the page
5018                          * to which we're not writing are considered up
5019                          * to date. Once the data is copied to the
5020                          * page, it can be set uptodate.
5021                          */
5022                         SetPageChecked(page);
5023                         goto out;
5024                 }
5025         }
5026
5027         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
5028                 /*
5029                  * might as well read a page, it is fast enough. If we get
5030                  * an error, we don't need to return it. cifs_write_end will
5031                  * do a sync write instead since PG_uptodate isn't set.
5032                  */
5033                 cifs_readpage_worker(file, page, &page_start);
5034                 put_page(page);
5035                 oncethru = 1;
5036                 goto start;
5037         } else {
5038                 /* we could try using another file handle if there is one -
5039                    but how would we lock it to prevent close of that handle
5040                    racing with this read? In any case
5041                    this will be written out by write_end so is fine */
5042         }
5043 out:
5044         *pagep = page;
5045         return rc;
5046 }
5047
5048 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
5049 {
5050         if (folio_test_private(folio))
5051                 return 0;
5052         if (folio_test_fscache(folio)) {
5053                 if (current_is_kswapd() || !(gfp & __GFP_FS))
5054                         return false;
5055                 folio_wait_fscache(folio);
5056         }
5057         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
5058         return true;
5059 }
5060
5061 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
5062                                  size_t length)
5063 {
5064         folio_wait_fscache(folio);
5065 }
5066
5067 static int cifs_launder_folio(struct folio *folio)
5068 {
5069         int rc = 0;
5070         loff_t range_start = folio_pos(folio);
5071         loff_t range_end = range_start + folio_size(folio);
5072         struct writeback_control wbc = {
5073                 .sync_mode = WB_SYNC_ALL,
5074                 .nr_to_write = 0,
5075                 .range_start = range_start,
5076                 .range_end = range_end,
5077         };
5078
5079         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5080
5081         if (folio_clear_dirty_for_io(folio))
5082                 rc = cifs_writepage_locked(&folio->page, &wbc);
5083
5084         folio_wait_fscache(folio);
5085         return rc;
5086 }
5087
5088 void cifs_oplock_break(struct work_struct *work)
5089 {
5090         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5091                                                   oplock_break);
5092         struct inode *inode = d_inode(cfile->dentry);
5093         struct cifsInodeInfo *cinode = CIFS_I(inode);
5094         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
5095         struct TCP_Server_Info *server = tcon->ses->server;
5096         int rc = 0;
5097         bool purge_cache = false;
5098
5099         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5100                         TASK_UNINTERRUPTIBLE);
5101
5102         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5103                                       cfile->oplock_epoch, &purge_cache);
5104
5105         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5106                                                 cifs_has_mand_locks(cinode)) {
5107                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5108                          inode);
5109                 cinode->oplock = 0;
5110         }
5111
5112         if (inode && S_ISREG(inode->i_mode)) {
5113                 if (CIFS_CACHE_READ(cinode))
5114                         break_lease(inode, O_RDONLY);
5115                 else
5116                         break_lease(inode, O_WRONLY);
5117                 rc = filemap_fdatawrite(inode->i_mapping);
5118                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5119                         rc = filemap_fdatawait(inode->i_mapping);
5120                         mapping_set_error(inode->i_mapping, rc);
5121                         cifs_zap_mapping(inode);
5122                 }
5123                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5124                 if (CIFS_CACHE_WRITE(cinode))
5125                         goto oplock_break_ack;
5126         }
5127
5128         rc = cifs_push_locks(cfile);
5129         if (rc)
5130                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5131
5132 oplock_break_ack:
5133         /*
5134          * releasing stale oplock after recent reconnect of smb session using
5135          * a now incorrect file handle is not a data integrity issue but do
5136          * not bother sending an oplock release if session to server still is
5137          * disconnected since oplock already released by the server
5138          */
5139         if (!cfile->oplock_break_cancelled) {
5140                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
5141                                                              cinode);
5142                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5143         }
5144
5145         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5146         cifs_done_oplock_break(cinode);
5147 }
5148
5149 /*
5150  * The presence of cifs_direct_io() in the address space ops vector
5151  * allowes open() O_DIRECT flags which would have failed otherwise.
5152  *
5153  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5154  * so this method should never be called.
5155  *
5156  * Direct IO is not yet supported in the cached mode.
5157  */
5158 static ssize_t
5159 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5160 {
5161         /*
5162          * FIXME
5163          * Eventually need to support direct IO for non forcedirectio mounts
5164          */
5165         return -EINVAL;
5166 }
5167
5168 static int cifs_swap_activate(struct swap_info_struct *sis,
5169                               struct file *swap_file, sector_t *span)
5170 {
5171         struct cifsFileInfo *cfile = swap_file->private_data;
5172         struct inode *inode = swap_file->f_mapping->host;
5173         unsigned long blocks;
5174         long long isize;
5175
5176         cifs_dbg(FYI, "swap activate\n");
5177
5178         if (!swap_file->f_mapping->a_ops->swap_rw)
5179                 /* Cannot support swap */
5180                 return -EINVAL;
5181
5182         spin_lock(&inode->i_lock);
5183         blocks = inode->i_blocks;
5184         isize = inode->i_size;
5185         spin_unlock(&inode->i_lock);
5186         if (blocks*512 < isize) {
5187                 pr_warn("swap activate: swapfile has holes\n");
5188                 return -EINVAL;
5189         }
5190         *span = sis->pages;
5191
5192         pr_warn_once("Swap support over SMB3 is experimental\n");
5193
5194         /*
5195          * TODO: consider adding ACL (or documenting how) to prevent other
5196          * users (on this or other systems) from reading it
5197          */
5198
5199
5200         /* TODO: add sk_set_memalloc(inet) or similar */
5201
5202         if (cfile)
5203                 cfile->swapfile = true;
5204         /*
5205          * TODO: Since file already open, we can't open with DENY_ALL here
5206          * but we could add call to grab a byte range lock to prevent others
5207          * from reading or writing the file
5208          */
5209
5210         sis->flags |= SWP_FS_OPS;
5211         return add_swap_extent(sis, 0, sis->max, 0);
5212 }
5213
5214 static void cifs_swap_deactivate(struct file *file)
5215 {
5216         struct cifsFileInfo *cfile = file->private_data;
5217
5218         cifs_dbg(FYI, "swap deactivate\n");
5219
5220         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5221
5222         if (cfile)
5223                 cfile->swapfile = false;
5224
5225         /* do we need to unpin (or unlock) the file */
5226 }
5227
5228 /*
5229  * Mark a page as having been made dirty and thus needing writeback.  We also
5230  * need to pin the cache object to write back to.
5231  */
5232 #ifdef CONFIG_CIFS_FSCACHE
5233 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5234 {
5235         return fscache_dirty_folio(mapping, folio,
5236                                         cifs_inode_cookie(mapping->host));
5237 }
5238 #else
5239 #define cifs_dirty_folio filemap_dirty_folio
5240 #endif
5241
5242 const struct address_space_operations cifs_addr_ops = {
5243         .read_folio = cifs_read_folio,
5244         .readahead = cifs_readahead,
5245         .writepages = cifs_writepages,
5246         .write_begin = cifs_write_begin,
5247         .write_end = cifs_write_end,
5248         .dirty_folio = cifs_dirty_folio,
5249         .release_folio = cifs_release_folio,
5250         .direct_IO = cifs_direct_io,
5251         .invalidate_folio = cifs_invalidate_folio,
5252         .launder_folio = cifs_launder_folio,
5253         .migrate_folio = filemap_migrate_folio,
5254         /*
5255          * TODO: investigate and if useful we could add an is_dirty_writeback
5256          * helper if needed
5257          */
5258         .swap_activate = cifs_swap_activate,
5259         .swap_deactivate = cifs_swap_deactivate,
5260 };
5261
5262 /*
5263  * cifs_readahead requires the server to support a buffer large enough to
5264  * contain the header plus one complete page of data.  Otherwise, we need
5265  * to leave cifs_readahead out of the address space operations.
5266  */
5267 const struct address_space_operations cifs_addr_ops_smallbuf = {
5268         .read_folio = cifs_read_folio,
5269         .writepages = cifs_writepages,
5270         .write_begin = cifs_write_begin,
5271         .write_end = cifs_write_end,
5272         .dirty_folio = cifs_dirty_folio,
5273         .release_folio = cifs_release_folio,
5274         .invalidate_folio = cifs_invalidate_folio,
5275         .launder_folio = cifs_launder_folio,
5276         .migrate_folio = filemap_migrate_folio,
5277 };
5278
5279 /*
5280  * Splice data from a file into a pipe.
5281  */
5282 ssize_t cifs_splice_read(struct file *in, loff_t *ppos,
5283                          struct pipe_inode_info *pipe, size_t len,
5284                          unsigned int flags)
5285 {
5286         if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
5287                 return 0;
5288         if (unlikely(!len))
5289                 return 0;
5290         if (in->f_flags & O_DIRECT)
5291                 return direct_splice_read(in, ppos, pipe, len, flags);
5292         return filemap_splice_read(in, ppos, pipe, len, flags);
5293 }