version 1.1.1.1, 2013/10/14 10:32:47
|
version 1.1.1.3, 2016/11/02 10:35:00
|
Line 1
|
Line 1
|
|
#include "first.h" |
|
|
#include "network_backends.h" |
#include "network_backends.h" |
|
|
#ifdef USE_WRITEV | #if defined(USE_WRITEV) |
|
|
#include "network.h" |
#include "network.h" |
#include "fdevent.h" |
|
#include "log.h" |
#include "log.h" |
#include "stat_cache.h" |
|
|
|
#include <sys/types.h> | #if defined(HAVE_SYS_UIO_H) |
#include <sys/socket.h> | # include <sys/uio.h> |
#include <sys/uio.h> | #endif |
#include <sys/stat.h> | |
#include <sys/time.h> | |
#include <sys/resource.h> | |
#include <netinet/in.h> | |
#include <netinet/tcp.h> | |
|
|
#include <errno.h> |
#include <errno.h> |
#include <fcntl.h> |
|
#include <unistd.h> |
|
#include <netdb.h> |
|
#include <string.h> |
#include <string.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <limits.h> |
|
#include <stdio.h> |
|
#include <assert.h> |
|
|
|
#if 0 |
|
#define LOCAL_BUFFERING 1 |
|
#endif |
|
|
|
#if defined(UIO_MAXIOV) |
#if defined(UIO_MAXIOV) |
# define MAX_CHUNKS UIO_MAXIOV | # define SYS_MAX_CHUNKS UIO_MAXIOV |
#elif defined(IOV_MAX) |
#elif defined(IOV_MAX) |
/* new name for UIO_MAXIOV since IEEE Std 1003.1-2001 */ |
/* new name for UIO_MAXIOV since IEEE Std 1003.1-2001 */ |
# define MAX_CHUNKS IOV_MAX | # define SYS_MAX_CHUNKS IOV_MAX |
#elif defined(_XOPEN_IOV_MAX) |
#elif defined(_XOPEN_IOV_MAX) |
/* minimum value for sysconf(_SC_IOV_MAX); posix requires this to be at least 16, which is good enough - no need to call sysconf() */ |
/* minimum value for sysconf(_SC_IOV_MAX); posix requires this to be at least 16, which is good enough - no need to call sysconf() */ |
# define MAX_CHUNKS _XOPEN_IOV_MAX | # define SYS_MAX_CHUNKS _XOPEN_IOV_MAX |
#else |
#else |
# error neither UIO_MAXIOV nor IOV_MAX nor _XOPEN_IOV_MAX are defined |
# error neither UIO_MAXIOV nor IOV_MAX nor _XOPEN_IOV_MAX are defined |
#endif |
#endif |
|
|
int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { | /* allocate iovec[MAX_CHUNKS] on stack, so pick a sane limit: |
chunk *c; | * - each entry will use 1 pointer + 1 size_t |
| * - 32 chunks -> 256 / 512 bytes (32-bit/64-bit pointers) |
for(c = cq->first; (max_bytes > 0) && (NULL != c); c = c->next) { | */ |
int chunk_finished = 0; | #define STACK_MAX_ALLOC_CHUNKS 32 |
| #if SYS_MAX_CHUNKS > STACK_MAX_ALLOC_CHUNKS |
switch(c->type) { | # define MAX_CHUNKS STACK_MAX_ALLOC_CHUNKS |
case MEM_CHUNK: { | |
char * offset; | |
off_t toSend; | |
ssize_t r; | |
| |
size_t num_chunks, i; | |
struct iovec *chunks; | |
chunk *tc; | |
size_t num_bytes = 0; | |
| |
/* build writev list | |
* | |
* 1. limit: num_chunks < MAX_CHUNKS | |
* 2. limit: num_bytes < max_bytes | |
*/ | |
for (num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < MAX_CHUNKS; num_chunks++, tc = tc->next); | |
| |
chunks = calloc(num_chunks, sizeof(*chunks)); | |
| |
for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) { | |
if (tc->mem->used == 0) { | |
chunks[i].iov_base = tc->mem->ptr; | |
chunks[i].iov_len = 0; | |
} else { | |
offset = tc->mem->ptr + tc->offset; | |
toSend = tc->mem->used - 1 - tc->offset; | |
| |
chunks[i].iov_base = offset; | |
| |
/* protect the return value of writev() */ | |
if (toSend > max_bytes || | |
(off_t) num_bytes + toSend > max_bytes) { | |
chunks[i].iov_len = max_bytes - num_bytes; | |
| |
num_chunks = i + 1; | |
break; | |
} else { | |
chunks[i].iov_len = toSend; | |
} | |
| |
num_bytes += toSend; | |
} | |
} | |
| |
if ((r = writev(fd, chunks, num_chunks)) < 0) { | |
switch (errno) { | |
case EAGAIN: | |
case EINTR: | |
r = 0; | |
break; | |
case EPIPE: | |
case ECONNRESET: | |
free(chunks); | |
return -2; | |
default: | |
log_error_write(srv, __FILE__, __LINE__, "ssd", | |
"writev failed:", strerror(errno), fd); | |
| |
free(chunks); | |
return -1; | |
} | |
} | |
| |
cq->bytes_out += r; | |
max_bytes -= r; | |
| |
/* check which chunks have been written */ | |
| |
for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) { | |
if (r >= (ssize_t)chunks[i].iov_len) { | |
/* written */ | |
r -= chunks[i].iov_len; | |
tc->offset += chunks[i].iov_len; | |
| |
if (chunk_finished) { | |
/* skip the chunks from further touches */ | |
c = c->next; | |
} else { | |
/* chunks_written + c = c->next is done in the for()*/ | |
chunk_finished = 1; | |
} | |
} else { | |
/* partially written */ | |
| |
tc->offset += r; | |
chunk_finished = 0; | |
| |
break; | |
} | |
} | |
free(chunks); | |
| |
break; | |
} | |
case FILE_CHUNK: { | |
ssize_t r; | |
off_t abs_offset; | |
off_t toSend; | |
stat_cache_entry *sce = NULL; | |
| |
#define KByte * 1024 | |
#define MByte * 1024 KByte | |
#define GByte * 1024 MByte | |
const off_t we_want_to_mmap = 512 KByte; | |
char *start = NULL; | |
| |
if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { | |
log_error_write(srv, __FILE__, __LINE__, "sb", | |
strerror(errno), c->file.name); | |
return -1; | |
} | |
| |
abs_offset = c->file.start + c->offset; | |
| |
if (abs_offset > sce->st.st_size) { | |
log_error_write(srv, __FILE__, __LINE__, "sb", | |
"file was shrinked:", c->file.name); | |
| |
return -1; | |
} | |
| |
/* mmap the buffer | |
* - first mmap | |
* - new mmap as the we are at the end of the last one */ | |
if (c->file.mmap.start == MAP_FAILED || | |
abs_offset == (off_t)(c->file.mmap.offset + c->file.mmap.length)) { | |
| |
/* Optimizations for the future: | |
* | |
* adaptive mem-mapping | |
* the problem: | |
* we mmap() the whole file. If someone has alot large files and 32bit | |
* machine the virtual address area will be unrun and we will have a failing | |
* mmap() call. | |
* solution: | |
* only mmap 16M in one chunk and move the window as soon as we have finished | |
* the first 8M | |
* | |
* read-ahead buffering | |
* the problem: | |
* sending out several large files in parallel trashes the read-ahead of the | |
* kernel leading to long wait-for-seek times. | |
* solutions: (increasing complexity) | |
* 1. use madvise | |
* 2. use a internal read-ahead buffer in the chunk-structure | |
* 3. use non-blocking IO for file-transfers | |
* */ | |
| |
/* all mmap()ed areas are 512kb expect the last which might be smaller */ | |
off_t we_want_to_send; | |
size_t to_mmap; | |
| |
/* this is a remap, move the mmap-offset */ | |
if (c->file.mmap.start != MAP_FAILED) { | |
munmap(c->file.mmap.start, c->file.mmap.length); | |
c->file.mmap.offset += we_want_to_mmap; | |
} else { | |
/* in case the range-offset is after the first mmap()ed area we skip the area */ | |
c->file.mmap.offset = 0; | |
| |
while (c->file.mmap.offset + we_want_to_mmap < c->file.start) { | |
c->file.mmap.offset += we_want_to_mmap; | |
} | |
} | |
| |
/* length is rel, c->offset too, assume there is no limit at the mmap-boundaries */ | |
we_want_to_send = c->file.length - c->offset; | |
to_mmap = (c->file.start + c->file.length) - c->file.mmap.offset; | |
| |
/* we have more to send than we can mmap() at once */ | |
if (abs_offset + we_want_to_send > c->file.mmap.offset + we_want_to_mmap) { | |
we_want_to_send = (c->file.mmap.offset + we_want_to_mmap) - abs_offset; | |
to_mmap = we_want_to_mmap; | |
} | |
| |
if (-1 == c->file.fd) { /* open the file if not already open */ | |
if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) { | |
log_error_write(srv, __FILE__, __LINE__, "sbs", "open failed for:", c->file.name, strerror(errno)); | |
| |
return -1; | |
} | |
#ifdef FD_CLOEXEC | |
fcntl(c->file.fd, F_SETFD, FD_CLOEXEC); | |
#endif | |
} | |
| |
if (MAP_FAILED == (c->file.mmap.start = mmap(NULL, to_mmap, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) { | |
/* close it here, otherwise we'd have to set FD_CLOEXEC */ | |
| |
log_error_write(srv, __FILE__, __LINE__, "ssbd", "mmap failed:", | |
strerror(errno), c->file.name, c->file.fd); | |
| |
return -1; | |
} | |
| |
c->file.mmap.length = to_mmap; | |
#ifdef LOCAL_BUFFERING | |
buffer_copy_string_len(c->mem, c->file.mmap.start, c->file.mmap.length); | |
#else |
#else |
#ifdef HAVE_MADVISE | # define MAX_CHUNKS SYS_MAX_CHUNKS |
/* don't advise files < 64Kb */ | |
if (c->file.mmap.length > (64 KByte)) { | |
/* darwin 7 is returning EINVAL all the time and I don't know how to | |
* detect this at runtime.i | |
* | |
* ignore the return value for now */ | |
madvise(c->file.mmap.start, c->file.mmap.length, MADV_WILLNEED); | |
} | |
#endif |
#endif |
#endif |
|
|
|
/* chunk_reset() or chunk_free() will cleanup for us */ | int network_writev_mem_chunks(server *srv, connection *con, int fd, chunkqueue *cq, off_t *p_max_bytes) { |
} | struct iovec chunks[MAX_CHUNKS]; |
| size_t num_chunks; |
| off_t max_bytes = *p_max_bytes; |
| off_t toSend; |
| ssize_t r; |
| UNUSED(con); |
|
|
/* to_send = abs_mmap_end - abs_offset */ | force_assert(NULL != cq->first); |
toSend = (c->file.mmap.offset + c->file.mmap.length) - (abs_offset); | force_assert(MEM_CHUNK == cq->first->type); |
|
|
if (toSend < 0) { | { |
log_error_write(srv, __FILE__, __LINE__, "soooo", | chunk const *c; |
"toSend is negative:", | |
toSend, | |
c->file.mmap.length, | |
abs_offset, | |
c->file.mmap.offset); | |
assert(toSend < 0); | |
} | |
|
|
if (toSend > max_bytes) toSend = max_bytes; | toSend = 0; |
| num_chunks = 0; |
| for (c = cq->first; NULL != c && MEM_CHUNK == c->type && num_chunks < MAX_CHUNKS && toSend < max_bytes; c = c->next) { |
| size_t c_len; |
|
|
#ifdef LOCAL_BUFFERING | force_assert(c->offset >= 0 && c->offset <= (off_t)buffer_string_length(c->mem)); |
start = c->mem->ptr; | c_len = buffer_string_length(c->mem) - c->offset; |
#else | if (c_len > 0) { |
start = c->file.mmap.start; | toSend += c_len; |
#endif | |
|
|
if ((r = write(fd, start + (abs_offset - c->file.mmap.offset), toSend)) < 0) { | chunks[num_chunks].iov_base = c->mem->ptr + c->offset; |
switch (errno) { | chunks[num_chunks].iov_len = c_len; |
case EAGAIN: | |
case EINTR: | |
r = 0; | |
break; | |
case EPIPE: | |
case ECONNRESET: | |
return -2; | |
default: | |
log_error_write(srv, __FILE__, __LINE__, "ssd", | |
"write failed:", strerror(errno), fd); | |
|
|
return -1; | ++num_chunks; |
} | |
} |
} |
|
} |
|
} |
|
|
c->offset += r; | if (0 == num_chunks) { |
cq->bytes_out += r; | chunkqueue_remove_finished_chunks(cq); |
max_bytes -= r; | return 0; |
| } |
|
|
if (c->offset == c->file.length) { | r = writev(fd, chunks, num_chunks); |
chunk_finished = 1; | |
|
|
/* we don't need the mmaping anymore */ | if (r < 0) switch (errno) { |
if (c->file.mmap.start != MAP_FAILED) { | case EAGAIN: |
munmap(c->file.mmap.start, c->file.mmap.length); | case EINTR: |
c->file.mmap.start = MAP_FAILED; | break; |
} | case EPIPE: |
} | case ECONNRESET: |
| return -2; |
| default: |
| log_error_write(srv, __FILE__, __LINE__, "ssd", |
| "writev failed:", strerror(errno), fd); |
| return -1; |
| } |
|
|
break; | if (r >= 0) { |
} | *p_max_bytes -= r; |
default: | chunkqueue_mark_written(cq, r); |
| } |
|
|
log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); | return (r > 0 && r == toSend) ? 0 : -3; |
| } |
|
|
return -1; | #endif /* USE_WRITEV */ |
} | |
|
|
if (!chunk_finished) { | int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq, off_t max_bytes) { |
/* not finished yet */ | while (max_bytes > 0 && NULL != cq->first) { |
| int r = -1; |
|
|
|
switch (cq->first->type) { |
|
case MEM_CHUNK: |
|
r = network_writev_mem_chunks(srv, con, fd, cq, &max_bytes); |
break; |
break; |
|
case FILE_CHUNK: |
|
r = network_write_file_chunk_mmap(srv, con, fd, cq, &max_bytes); |
|
break; |
} |
} |
|
|
|
if (-3 == r) return 0; |
|
if (0 != r) return r; |
} |
} |
|
|
return 0; |
return 0; |
} |
} |
|
|
#endif |
|