Annotation of embedaddon/lighttpd/src/network_writev.c, revision 1.1.1.2
1.1 misho 1: #include "network_backends.h"
2:
3: #ifdef USE_WRITEV
4:
5: #include "network.h"
6: #include "fdevent.h"
7: #include "log.h"
8: #include "stat_cache.h"
9:
10: #include <sys/types.h>
11: #include <sys/socket.h>
12: #include <sys/uio.h>
13: #include <sys/stat.h>
14: #include <sys/time.h>
15: #include <sys/resource.h>
16: #include <netinet/in.h>
17: #include <netinet/tcp.h>
18:
19: #include <errno.h>
20: #include <fcntl.h>
21: #include <unistd.h>
22: #include <netdb.h>
23: #include <string.h>
24: #include <stdlib.h>
25: #include <limits.h>
26: #include <stdio.h>
27: #include <assert.h>
28:
29: #if 0
30: #define LOCAL_BUFFERING 1
31: #endif
32:
33: #if defined(UIO_MAXIOV)
34: # define MAX_CHUNKS UIO_MAXIOV
35: #elif defined(IOV_MAX)
36: /* new name for UIO_MAXIOV since IEEE Std 1003.1-2001 */
37: # define MAX_CHUNKS IOV_MAX
38: #elif defined(_XOPEN_IOV_MAX)
39: /* minimum value for sysconf(_SC_IOV_MAX); posix requires this to be at least 16, which is good enough - no need to call sysconf() */
40: # define MAX_CHUNKS _XOPEN_IOV_MAX
41: #else
42: # error neither UIO_MAXIOV nor IOV_MAX nor _XOPEN_IOV_MAX are defined
43: #endif
44:
45: int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) {
46: chunk *c;
47:
48: for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) {
49: int chunk_finished = 0;
50:
51: switch(c->type) {
52: case MEM_CHUNK: {
53: char * offset;
54: off_t toSend;
55: ssize_t r;
56:
57: size_t num_chunks, i;
58: struct iovec *chunks;
59: chunk *tc;
60: size_t num_bytes = 0;
61:
62: /* build writev list
63: *
64: * 1. limit: num_chunks < MAX_CHUNKS
65: * 2. limit: num_bytes < max_bytes
66: */
67: for (num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < MAX_CHUNKS; num_chunks++, tc = tc->next);
68:
69: chunks = calloc(num_chunks, sizeof(*chunks));
70:
71: for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) {
72: if (tc->mem->used == 0) {
73: chunks[i].iov_base = tc->mem->ptr;
74: chunks[i].iov_len = 0;
75: } else {
76: offset = tc->mem->ptr + tc->offset;
77: toSend = tc->mem->used - 1 - tc->offset;
78:
79: chunks[i].iov_base = offset;
80:
81: /* protect the return value of writev() */
82: if (toSend > max_bytes ||
83: (off_t) num_bytes + toSend > max_bytes) {
84: chunks[i].iov_len = max_bytes - num_bytes;
85:
86: num_chunks = i + 1;
87: break;
88: } else {
89: chunks[i].iov_len = toSend;
90: }
91:
92: num_bytes += toSend;
93: }
94: }
95:
96: if ((r = writev(fd, chunks, num_chunks)) < 0) {
97: switch (errno) {
98: case EAGAIN:
99: case EINTR:
100: r = 0;
101: break;
102: case EPIPE:
103: case ECONNRESET:
104: free(chunks);
105: return -2;
106: default:
107: log_error_write(srv, __FILE__, __LINE__, "ssd",
108: "writev failed:", strerror(errno), fd);
109:
110: free(chunks);
111: return -1;
112: }
113: }
114:
115: cq->bytes_out += r;
116: max_bytes -= r;
117:
118: /* check which chunks have been written */
119:
120: for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) {
121: if (r >= (ssize_t)chunks[i].iov_len) {
122: /* written */
123: r -= chunks[i].iov_len;
124: tc->offset += chunks[i].iov_len;
125:
126: if (chunk_finished) {
127: /* skip the chunks from further touches */
128: c = c->next;
129: } else {
130: /* chunks_written + c = c->next is done in the for()*/
131: chunk_finished = 1;
132: }
133: } else {
134: /* partially written */
135:
136: tc->offset += r;
137: chunk_finished = 0;
138:
139: break;
140: }
141: }
142: free(chunks);
143:
144: break;
145: }
146: case FILE_CHUNK: {
147: ssize_t r;
148: off_t abs_offset;
149: off_t toSend;
150: stat_cache_entry *sce = NULL;
151:
152: #define KByte * 1024
153: #define MByte * 1024 KByte
154: #define GByte * 1024 MByte
155: const off_t we_want_to_mmap = 512 KByte;
156: char *start = NULL;
157:
158: if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) {
159: log_error_write(srv, __FILE__, __LINE__, "sb",
160: strerror(errno), c->file.name);
161: return -1;
162: }
163:
164: abs_offset = c->file.start + c->offset;
165:
166: if (abs_offset > sce->st.st_size) {
167: log_error_write(srv, __FILE__, __LINE__, "sb",
168: "file was shrinked:", c->file.name);
169:
170: return -1;
171: }
172:
173: /* mmap the buffer
174: * - first mmap
175: * - new mmap as the we are at the end of the last one */
176: if (c->file.mmap.start == MAP_FAILED ||
177: abs_offset == (off_t)(c->file.mmap.offset + c->file.mmap.length)) {
178:
179: /* Optimizations for the future:
180: *
181: * adaptive mem-mapping
182: * the problem:
183: * we mmap() the whole file. If someone has alot large files and 32bit
184: * machine the virtual address area will be unrun and we will have a failing
185: * mmap() call.
186: * solution:
187: * only mmap 16M in one chunk and move the window as soon as we have finished
188: * the first 8M
189: *
190: * read-ahead buffering
191: * the problem:
192: * sending out several large files in parallel trashes the read-ahead of the
193: * kernel leading to long wait-for-seek times.
194: * solutions: (increasing complexity)
195: * 1. use madvise
196: * 2. use a internal read-ahead buffer in the chunk-structure
197: * 3. use non-blocking IO for file-transfers
198: * */
199:
200: /* all mmap()ed areas are 512kb expect the last which might be smaller */
201: off_t we_want_to_send;
202: size_t to_mmap;
203:
204: /* this is a remap, move the mmap-offset */
205: if (c->file.mmap.start != MAP_FAILED) {
206: munmap(c->file.mmap.start, c->file.mmap.length);
207: c->file.mmap.offset += we_want_to_mmap;
208: } else {
209: /* in case the range-offset is after the first mmap()ed area we skip the area */
210: c->file.mmap.offset = 0;
211:
212: while (c->file.mmap.offset + we_want_to_mmap < c->file.start) {
213: c->file.mmap.offset += we_want_to_mmap;
214: }
215: }
216:
217: /* length is rel, c->offset too, assume there is no limit at the mmap-boundaries */
218: we_want_to_send = c->file.length - c->offset;
219: to_mmap = (c->file.start + c->file.length) - c->file.mmap.offset;
220:
221: /* we have more to send than we can mmap() at once */
222: if (abs_offset + we_want_to_send > c->file.mmap.offset + we_want_to_mmap) {
223: we_want_to_send = (c->file.mmap.offset + we_want_to_mmap) - abs_offset;
224: to_mmap = we_want_to_mmap;
225: }
226:
227: if (-1 == c->file.fd) { /* open the file if not already open */
228: if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) {
229: log_error_write(srv, __FILE__, __LINE__, "sbs", "open failed for:", c->file.name, strerror(errno));
230:
231: return -1;
232: }
1.1.1.2 ! misho 233: fd_close_on_exec(c->file.fd);
1.1 misho 234: }
235:
236: if (MAP_FAILED == (c->file.mmap.start = mmap(NULL, to_mmap, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) {
237: log_error_write(srv, __FILE__, __LINE__, "ssbd", "mmap failed:",
238: strerror(errno), c->file.name, c->file.fd);
239:
240: return -1;
241: }
242:
243: c->file.mmap.length = to_mmap;
244: #ifdef LOCAL_BUFFERING
245: buffer_copy_string_len(c->mem, c->file.mmap.start, c->file.mmap.length);
246: #else
247: #ifdef HAVE_MADVISE
248: /* don't advise files < 64Kb */
249: if (c->file.mmap.length > (64 KByte)) {
250: /* darwin 7 is returning EINVAL all the time and I don't know how to
251: * detect this at runtime.i
252: *
253: * ignore the return value for now */
254: madvise(c->file.mmap.start, c->file.mmap.length, MADV_WILLNEED);
255: }
256: #endif
257: #endif
258:
259: /* chunk_reset() or chunk_free() will cleanup for us */
260: }
261:
262: /* to_send = abs_mmap_end - abs_offset */
263: toSend = (c->file.mmap.offset + c->file.mmap.length) - (abs_offset);
264:
265: if (toSend < 0) {
266: log_error_write(srv, __FILE__, __LINE__, "soooo",
267: "toSend is negative:",
268: toSend,
269: c->file.mmap.length,
270: abs_offset,
271: c->file.mmap.offset);
1.1.1.2 ! misho 272: force_assert(toSend < 0);
1.1 misho 273: }
274:
275: if (toSend > max_bytes) toSend = max_bytes;
276:
277: #ifdef LOCAL_BUFFERING
278: start = c->mem->ptr;
279: #else
280: start = c->file.mmap.start;
281: #endif
282:
283: if ((r = write(fd, start + (abs_offset - c->file.mmap.offset), toSend)) < 0) {
284: switch (errno) {
285: case EAGAIN:
286: case EINTR:
287: r = 0;
288: break;
289: case EPIPE:
290: case ECONNRESET:
291: return -2;
292: default:
293: log_error_write(srv, __FILE__, __LINE__, "ssd",
294: "write failed:", strerror(errno), fd);
295:
296: return -1;
297: }
298: }
299:
300: c->offset += r;
301: cq->bytes_out += r;
302: max_bytes -= r;
303:
304: if (c->offset == c->file.length) {
305: chunk_finished = 1;
306:
307: /* we don't need the mmaping anymore */
308: if (c->file.mmap.start != MAP_FAILED) {
309: munmap(c->file.mmap.start, c->file.mmap.length);
310: c->file.mmap.start = MAP_FAILED;
311: }
312: }
313:
314: break;
315: }
316: default:
317:
318: log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known");
319:
320: return -1;
321: }
322:
323: if (!chunk_finished) {
324: /* not finished yet */
325:
326: break;
327: }
328: }
329:
330: return 0;
331: }
332:
333: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>