s3:lib/recvfile: make use of F_SETPIPE_SZ and reduce the splice syscalls.
[metze/samba/wip.git] / source3 / lib / recvfile.c
1 /*
2  Unix SMB/Netbios implementation.
3  Version 3.2.x
4  recvfile implementations.
5  Copyright (C) Jeremy Allison 2007.
6
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  GNU General Public License for more details.
15
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 /*
21  * This file handles the OS dependent recvfile implementations.
22  * The API is such that it returns -1 on error, else returns the
23  * number of bytes written.
24  */
25
26 #include "includes.h"
27 #include "system/filesys.h"
28 #include "lib/util/sys_rw.h"
29
30 /* Do this on our own in TRANSFER_BUF_SIZE chunks.
31  * It's safe to make direct syscalls to lseek/write here
32  * as we're below the Samba vfs layer.
33  *
34  * Returns -1 on short reads from fromfd (read error)
35  * and sets errno.
36  *
37  * Returns number of bytes written to 'tofd'
38  * return != count then sets errno.
39  * Returns count if complete success.
40  */
41
42 #ifndef TRANSFER_BUF_SIZE
43 #define TRANSFER_BUF_SIZE (128*1024)
44 #endif
45
46 static ssize_t default_sys_recvfile(int fromfd,
47                         int tofd,
48                         off_t offset,
49                         size_t count)
50 {
51         int saved_errno = 0;
52         size_t total = 0;
53         size_t bufsize = MIN(TRANSFER_BUF_SIZE,count);
54         size_t total_written = 0;
55         char buffer[bufsize];
56
57         DEBUG(10,("default_sys_recvfile: from = %d, to = %d, "
58                 "offset=%.0f, count = %lu\n",
59                 fromfd, tofd, (double)offset,
60                 (unsigned long)count));
61
62         if (count == 0) {
63                 return 0;
64         }
65
66         if (tofd != -1 && offset != (off_t)-1) {
67                 if (lseek(tofd, offset, SEEK_SET) == -1) {
68                         if (errno != ESPIPE) {
69                                 return -1;
70                         }
71                 }
72         }
73
74         while (total < count) {
75                 size_t num_written = 0;
76                 ssize_t read_ret;
77                 size_t toread = MIN(bufsize,count - total);
78
79                 /*
80                  * Read from socket - ignore EINTR.
81                  * Can't use sys_read() as that also
82                  * ignores EAGAIN and EWOULDBLOCK.
83                  */
84                 do {
85                         read_ret = read(fromfd, buffer, toread);
86                 } while (read_ret == -1 && errno == EINTR);
87
88                 if (read_ret == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
89                         /*
90                          * fromfd socket is in non-blocking mode.
91                          * If we already read some and wrote
92                          * it successfully, return that.
93                          * Only return -1 if this is the first read
94                          * attempt. Caller will handle both cases.
95                          */
96                         if (total_written != 0) {
97                                 return total_written;
98                         }
99                         return -1;
100                 }
101
102                 if (read_ret <= 0) {
103                         /* EOF or socket error. */
104                         return -1;
105                 }
106
107                 num_written = 0;
108
109                 /* Don't write any more after a write error. */
110                 while (tofd != -1 && (num_written < read_ret)) {
111                         ssize_t write_ret;
112
113                         /* Write to file - ignore EINTR. */
114                         write_ret = sys_write(tofd,
115                                         buffer + num_written,
116                                         read_ret - num_written);
117
118                         if (write_ret <= 0) {
119                                 /* write error - stop writing. */
120                                 tofd = -1;
121                                 if (total_written == 0) {
122                                         /* Ensure we return
123                                            -1 if the first
124                                            write failed. */
125                                         total_written = -1;
126                                 }
127                                 saved_errno = errno;
128                                 break;
129                         }
130
131                         num_written += (size_t)write_ret;
132                         total_written += (size_t)write_ret;
133                 }
134
135                 total += read_ret;
136         }
137
138         if (saved_errno) {
139                 /* Return the correct write error. */
140                 errno = saved_errno;
141         }
142         return (ssize_t)total_written;
143 }
144
145 #if defined(HAVE_LINUX_SPLICE)
146
147 /*
148  * Try and use the Linux system call to do this.
149  * Remember we only return -1 if the socket read
150  * failed. Else we return the number of bytes
151  * actually written. We always read count bytes
152  * from the network in the case of return != -1.
153  */
154
155
156 ssize_t sys_recvfile(int fromfd,
157                         int tofd,
158                         off_t offset,
159                         size_t count)
160 {
161         static int pipefd[2] = { -1, -1 };
162         static bool try_splice_call = false;
163         static size_t chunk_size = 16384;
164         size_t total_written = 0;
165         loff_t splice_offset = offset;
166
167         DEBUG(10,("sys_recvfile: from = %d, to = %d, "
168                 "offset=%.0f, count = %lu\n",
169                 fromfd, tofd, (double)offset,
170                 (unsigned long)count));
171
172         if (count == 0) {
173                 return 0;
174         }
175
176         /*
177          * Older Linux kernels have splice for sendfile,
178          * but it fails for recvfile. Ensure we only try
179          * this once and always fall back to the userspace
180          * implementation if recvfile splice fails. JRA.
181          */
182
183         if (!try_splice_call) {
184                 return default_sys_recvfile(fromfd,
185                                 tofd,
186                                 offset,
187                                 count);
188         }
189
190         if (pipefd[0] == -1) {
191                 int ret;
192
193                 ret = pipe(pipefd);
194                 if (ret == -1) {
195                         try_splice_call = false;
196                         return default_sys_recvfile(fromfd, tofd, offset, count);
197                 }
198
199 #ifdef F_SETPIPE_SZ
200                 fcntl(pipefd[1], F_SETPIPE_SZ, 1048576);
201                 ret = fcntl(pipefd[1], F_GETPIPE_SZ);
202                 if (ret > chunk_size) {
203                         chunk_size = ret;
204                 }
205 #endif
206         }
207
208         while (count > 0) {
209                 int nread, to_write;
210
211                 nread = splice(fromfd, NULL, pipefd[1], NULL,
212                                MIN(count, chunk_size), SPLICE_F_MOVE);
213                 if (nread == -1) {
214                         if (errno == EINTR) {
215                                 continue;
216                         }
217                         if (total_written == 0 &&
218                             (errno == EBADF || errno == EINVAL)) {
219                                 try_splice_call = false;
220                                 return default_sys_recvfile(fromfd, tofd,
221                                                             offset, count);
222                         }
223                         if (errno == EAGAIN || errno == EWOULDBLOCK) {
224                                 /*
225                                  * fromfd socket is in non-blocking mode.
226                                  * If we already read some and wrote
227                                  * it successfully, return that.
228                                  * Only return -1 if this is the first read
229                                  * attempt. Caller will handle both cases.
230                                  */
231                                 if (total_written != 0) {
232                                         return total_written;
233                                 }
234                                 return -1;
235                         }
236                         break;
237                 }
238
239                 to_write = nread;
240                 while (to_write > 0) {
241                         int thistime;
242                         thistime = splice(pipefd[0], NULL, tofd,
243                                           &splice_offset, to_write,
244                                           SPLICE_F_MOVE);
245                         if (thistime == -1) {
246                                 goto done;
247                         }
248                         to_write -= thistime;
249                 }
250
251                 total_written += nread;
252                 count -= nread;
253         }
254
255  done:
256         if (count) {
257                 int saved_errno = errno;
258                 if (drain_socket(fromfd, count) != count) {
259                         /* socket is dead. */
260                         return -1;
261                 }
262                 errno = saved_errno;
263         }
264
265         return total_written;
266 }
267 #else
268
269 /*****************************************************************
270  No recvfile system call - use the default 128 chunk implementation.
271 *****************************************************************/
272
273 ssize_t sys_recvfile(int fromfd,
274                         int tofd,
275                         off_t offset,
276                         size_t count)
277 {
278         return default_sys_recvfile(fromfd, tofd, offset, count);
279 }
280 #endif
281
282 /*****************************************************************
283  Throw away "count" bytes from the client socket.
284  Returns count or -1 on error.
285  Must only operate on a blocking socket.
286 *****************************************************************/
287
288 ssize_t drain_socket(int sockfd, size_t count)
289 {
290         size_t total = 0;
291         size_t bufsize = MIN(TRANSFER_BUF_SIZE,count);
292         char buffer[bufsize];
293         int old_flags = 0;
294
295         if (count == 0) {
296                 return 0;
297         }
298
299         old_flags = fcntl(sockfd, F_GETFL, 0);
300         if (set_blocking(sockfd, true) == -1) {
301                 return -1;
302         }
303
304         while (total < count) {
305                 ssize_t read_ret;
306                 size_t toread = MIN(bufsize,count - total);
307
308                 /* Read from socket - ignore EINTR. */
309                 read_ret = sys_read(sockfd, buffer, toread);
310                 if (read_ret <= 0) {
311                         /* EOF or socket error. */
312                         count = (size_t)-1;
313                         goto out;
314                 }
315                 total += read_ret;
316         }
317
318   out:
319
320         if (fcntl(sockfd, F_SETFL, old_flags) == -1) {
321                 return -1;
322         }
323         return count;
324 }