Annotation of embedaddon/ntp/lib/isc/win32/socket.c, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
3: * Copyright (C) 2000-2003 Internet Software Consortium.
4: *
5: * Permission to use, copy, modify, and/or distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10: * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11: * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12: * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13: * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14: * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15: * PERFORMANCE OF THIS SOFTWARE.
16: */
17:
18: /* $Id: socket.c,v 1.70.54.4 2009/01/29 22:40:36 jinmei Exp $ */
19:
20: /* This code uses functions which are only available on Server 2003 and
21: * higher, and Windows XP and higher.
22: *
23: * This code is by nature multithreaded and takes advantage of various
24: * features to pass on information through the completion port for
25: * when I/O is completed. All sends, receives, accepts, and connects are
26: * completed through the completion port.
27: *
28: * The number of Completion Port Worker threads used is the total number
29: * of CPU's + 1. This increases the likelihood that a Worker Thread is
30: * available for processing a completed request.
31: *
32: * XXXPDM 5 August, 2002
33: */
34:
35: #define MAKE_EXTERNAL 1
36: #include <config.h>
37:
38: #include <sys/types.h>
39:
40: #ifndef _WINSOCKAPI_
41: #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */
42: #endif
43:
44: #include <errno.h>
45: #include <stddef.h>
46: #include <stdlib.h>
47: #include <string.h>
48: #include <unistd.h>
49: #include <io.h>
50: #include <fcntl.h>
51: #include <process.h>
52:
53: #include <isc/buffer.h>
54: #include <isc/bufferlist.h>
55: #include <isc/condition.h>
56: #include <isc/list.h>
57: #include <isc/log.h>
58: #include <isc/mem.h>
59: #include <isc/msgs.h>
60: #include <isc/mutex.h>
61: #include <isc/net.h>
62: #include <isc/once.h>
63: #include <isc/os.h>
64: #include <isc/platform.h>
65: #include <isc/print.h>
66: #include <isc/region.h>
67: #include <isc/socket.h>
68: #include <isc/stats.h>
69: #include <isc/strerror.h>
70: #include <isc/syslog.h>
71: #include <isc/task.h>
72: #include <isc/thread.h>
73: #include <isc/util.h>
74: #include <isc/win32os.h>
75:
76: #include <mswsock.h>
77:
78: #include "errno2result.h"
79:
80: /*
81: * How in the world can Microsoft exist with APIs like this?
82: * We can't actually call this directly, because it turns out
83: * no library exports this function. Instead, we need to
84: * issue a runtime call to get the address.
85: */
86: LPFN_CONNECTEX ISCConnectEx;
87: LPFN_ACCEPTEX ISCAcceptEx;
88: LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
89:
90: /*
91: * Run expensive internal consistency checks.
92: */
93: #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
94: #define CONSISTENT(sock) consistent(sock)
95: #else
96: #define CONSISTENT(sock) do {} while (0)
97: #endif
98: static void consistent(isc_socket_t *sock);
99:
100: /*
101: * Define this macro to control the behavior of connection
102: * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823
103: * for details.
104: * NOTE: This requires that Windows 2000 systems install Service Pack 2
105: * or later.
106: */
107: #ifndef SIO_UDP_CONNRESET
108: #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
109: #endif
110:
111: /*
112: * Some systems define the socket length argument as an int, some as size_t,
113: * some as socklen_t. This is here so it can be easily changed if needed.
114: */
115: #ifndef ISC_SOCKADDR_LEN_T
116: #define ISC_SOCKADDR_LEN_T unsigned int
117: #endif
118:
119: /*
120: * Define what the possible "soft" errors can be. These are non-fatal returns
121: * of various network related functions, like recv() and so on.
122: */
123: #define SOFT_ERROR(e) ((e) == WSAEINTR || \
124: (e) == WSAEWOULDBLOCK || \
125: (e) == EWOULDBLOCK || \
126: (e) == EINTR || \
127: (e) == EAGAIN || \
128: (e) == 0)
129:
130: /*
131: * Pending errors are not really errors and should be
132: * kept separate
133: */
134: #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
135:
136: #define DOIO_SUCCESS 0 /* i/o ok, event sent */
137: #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
138: #define DOIO_HARD 2 /* i/o error, event sent */
139: #define DOIO_EOF 3 /* EOF, no event sent */
140: #define DOIO_PENDING 4 /* status when i/o is in process */
141: #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */
142:
143: #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
144:
145: /*
146: * DLVL(90) -- Function entry/exit and other tracing.
147: * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
148: * DLVL(60) -- Socket data send/receive
149: * DLVL(50) -- Event tracing, including receiving/sending completion events.
150: * DLVL(20) -- Socket creation/destruction.
151: */
152: #define TRACE_LEVEL 90
153: #define CORRECTNESS_LEVEL 70
154: #define IOEVENT_LEVEL 60
155: #define EVENT_LEVEL 50
156: #define CREATION_LEVEL 20
157:
158: #define TRACE DLVL(TRACE_LEVEL)
159: #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
160: #define IOEVENT DLVL(IOEVENT_LEVEL)
161: #define EVENT DLVL(EVENT_LEVEL)
162: #define CREATION DLVL(CREATION_LEVEL)
163:
164: typedef isc_event_t intev_t;
165:
166: /*
167: * Socket State
168: */
169: enum {
170: SOCK_INITIALIZED, /* Socket Initialized */
171: SOCK_OPEN, /* Socket opened but nothing yet to do */
172: SOCK_DATA, /* Socket sending or receiving data */
173: SOCK_LISTEN, /* TCP Socket listening for connects */
174: SOCK_ACCEPT, /* TCP socket is waiting to accept */
175: SOCK_CONNECT, /* TCP Socket connecting */
176: SOCK_CLOSED, /* Socket has been closed */
177: };
178:
179: #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
180: #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
181:
182: /*
183: * IPv6 control information. If the socket is an IPv6 socket we want
184: * to collect the destination address and interface so the client can
185: * set them on outgoing packets.
186: */
187: #ifdef ISC_PLATFORM_HAVEIPV6
188: #ifndef USE_CMSG
189: #define USE_CMSG 1
190: #endif
191: #endif
192:
193: /*
194: * We really don't want to try and use these control messages. Win32
195: * doesn't have this mechanism before XP.
196: */
197: #undef USE_CMSG
198:
199: /*
200: * Message header for recvmsg and sendmsg calls.
201: * Used value-result for recvmsg, value only for sendmsg.
202: */
203: struct msghdr {
204: SOCKADDR_STORAGE to_addr; /* UDP send/recv address */
205: int to_addr_len; /* length of the address */
206: WSABUF *msg_iov; /* scatter/gather array */
207: u_int msg_iovlen; /* # elements in msg_iov */
208: void *msg_control; /* ancillary data, see below */
209: u_int msg_controllen; /* ancillary data buffer len */
210: int msg_totallen; /* total length of this message */
211: } msghdr;
212:
213: /*
214: * The size to raise the receive buffer to.
215: */
216: #define RCVBUFSIZE (32*1024)
217:
218: /*
219: * The number of times a send operation is repeated if the result
220: * is WSAEINTR.
221: */
222: #define NRETRIES 10
223:
224: struct isc_socket {
225: /* Not locked. */
226: unsigned int magic;
227: isc_socketmgr_t *manager;
228: isc_mutex_t lock;
229: isc_sockettype_t type;
230:
231: /* Pointers to scatter/gather buffers */
232: WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER];
233:
234: /* Locked by socket lock. */
235: ISC_LINK(isc_socket_t) link;
236: unsigned int references; /* EXTERNAL references */
237: SOCKET fd; /* file handle */
238: int pf; /* protocol family */
239: char name[16];
240: void * tag;
241:
242: /*
243: * Each recv() call uses this buffer. It is a per-socket receive
244: * buffer that allows us to decouple the system recv() from the
245: * recv_list done events. This means the items on the recv_list
246: * can be removed without having to cancel pending system recv()
247: * calls. It also allows us to read-ahead in some cases.
248: */
249: struct {
250: SOCKADDR_STORAGE from_addr; // UDP send/recv address
251: int from_addr_len; // length of the address
252: char *base; // the base of the buffer
253: char *consume_position; // where to start copying data from next
254: unsigned int len; // the actual size of this buffer
255: unsigned int remaining; // the number of bytes remaining
256: } recvbuf;
257:
258: ISC_LIST(isc_socketevent_t) send_list;
259: ISC_LIST(isc_socketevent_t) recv_list;
260: ISC_LIST(isc_socket_newconnev_t) accept_list;
261: isc_socket_connev_t *connect_ev;
262:
263: isc_sockaddr_t address; /* remote address */
264:
265: unsigned int listener : 1, /* listener socket */
266: connected : 1,
267: pending_connect : 1, /* connect pending */
268: bound : 1; /* bound to local addr */
269: unsigned int pending_iocp; /* Should equal the counters below. Debug. */
270: unsigned int pending_recv; /* Number of outstanding recv() calls. */
271: unsigned int pending_send; /* Number of outstanding send() calls. */
272: unsigned int pending_accept; /* Number of outstanding accept() calls. */
273: unsigned int state; /* Socket state. Debugging and consistency checking. */
274: int state_lineno; /* line which last touched state */
275: };
276:
277: #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
278:
279: /*
280: * Buffer structure
281: */
282: typedef struct buflist buflist_t;
283:
284: struct buflist {
285: void *buf;
286: unsigned int buflen;
287: ISC_LINK(buflist_t) link;
288: };
289:
290: /*
291: * I/O Completion ports Info structures
292: */
293:
294: static HANDLE hHeapHandle = NULL;
295: typedef struct IoCompletionInfo {
296: OVERLAPPED overlapped;
297: isc_socketevent_t *dev; /* send()/recv() done event */
298: isc_socket_connev_t *cdev; /* connect() done event */
299: isc_socket_newconnev_t *adev; /* accept() done event */
300: void *acceptbuffer;
301: DWORD received_bytes;
302: int request_type;
303: struct msghdr messagehdr;
304: ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */
305: } IoCompletionInfo;
306:
307: /*
308: * Define a maximum number of I/O Completion Port worker threads
309: * to handle the load on the Completion Port. The actual number
310: * used is the number of CPU's + 1.
311: */
312: #define MAX_IOCPTHREADS 20
313:
314: #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
315: #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
316:
317: struct isc_socketmgr {
318: /* Not locked. */
319: unsigned int magic;
320: isc_mem_t *mctx;
321: isc_mutex_t lock;
322: isc_stats_t *stats;
323:
324: /* Locked by manager lock. */
325: ISC_LIST(isc_socket_t) socklist;
326: isc_boolean_t bShutdown;
327: isc_condition_t shutdown_ok;
328: HANDLE hIoCompletionPort;
329: int maxIOCPThreads;
330: HANDLE hIOCPThreads[MAX_IOCPTHREADS];
331: DWORD dwIOCPThreadIds[MAX_IOCPTHREADS];
332:
333: /*
334: * Debugging.
335: * Modified by InterlockedIncrement() and InterlockedDecrement()
336: */
337: LONG totalSockets;
338: LONG iocp_total;
339: };
340:
341: enum {
342: SOCKET_RECV,
343: SOCKET_SEND,
344: SOCKET_ACCEPT,
345: SOCKET_CONNECT
346: };
347:
348: /*
349: * send() and recv() iovec counts
350: */
351: #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
352: #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
353:
354: static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
355: static void maybe_free_socket(isc_socket_t **, int);
356: static void free_socket(isc_socket_t **, int);
357: static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
358: static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
359: static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
360: static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
361: static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
362: static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
363: static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
364: static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
365: static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
366: static void queue_receive_request(isc_socket_t *sock);
367:
368: /*
369: * This is used to dump the contents of the sock structure
370: * You should make sure that the sock is locked before
371: * dumping it. Since the code uses simple printf() statements
372: * it should only be used interactively.
373: */
374: void
375: sock_dump(isc_socket_t *sock) {
376: isc_socketevent_t *ldev;
377: isc_socket_newconnev_t *ndev;
378:
379: #if 0
380: isc_sockaddr_t addr;
381: char socktext[256];
382:
383: isc_socket_getpeername(sock, &addr);
384: isc_sockaddr_format(&addr, socktext, sizeof(socktext));
385: printf("Remote Socket: %s\n", socktext);
386: isc_socket_getsockname(sock, &addr);
387: isc_sockaddr_format(&addr, socktext, sizeof(socktext));
388: printf("This Socket: %s\n", socktext);
389: #endif
390:
391: printf("\n\t\tSock Dump\n");
392: printf("\t\tfd: %u\n", sock->fd);
393: printf("\t\treferences: %d\n", sock->references);
394: printf("\t\tpending_accept: %d\n", sock->pending_accept);
395: printf("\t\tconnecting: %d\n", sock->pending_connect);
396: printf("\t\tconnected: %d\n", sock->connected);
397: printf("\t\tbound: %d\n", sock->bound);
398: printf("\t\tpending_iocp: %d\n", sock->pending_iocp);
399: printf("\t\tsocket type: %d\n", sock->type);
400:
401: printf("\n\t\tSock Recv List\n");
402: ldev = ISC_LIST_HEAD(sock->recv_list);
403: while (ldev != NULL) {
404: printf("\t\tdev: %p\n", ldev);
405: ldev = ISC_LIST_NEXT(ldev, ev_link);
406: }
407:
408: printf("\n\t\tSock Send List\n");
409: ldev = ISC_LIST_HEAD(sock->send_list);
410: while (ldev != NULL) {
411: printf("\t\tdev: %p\n", ldev);
412: ldev = ISC_LIST_NEXT(ldev, ev_link);
413: }
414:
415: printf("\n\t\tSock Accept List\n");
416: ndev = ISC_LIST_HEAD(sock->accept_list);
417: while (ndev != NULL) {
418: printf("\t\tdev: %p\n", ldev);
419: ndev = ISC_LIST_NEXT(ndev, ev_link);
420: }
421: }
422:
423: static void
424: socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
425: isc_logcategory_t *category, isc_logmodule_t *module, int level,
426: isc_msgcat_t *msgcat, int msgset, int message,
427: const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
428:
429: /* This function will add an entry to the I/O completion port
430: * that will signal the I/O thread to exit (gracefully)
431: */
432: static void
433: signal_iocompletionport_exit(isc_socketmgr_t *manager) {
434: int i;
435: int errval;
436: char strbuf[ISC_STRERRORSIZE];
437:
438: REQUIRE(VALID_MANAGER(manager));
439: for (i = 0; i < manager->maxIOCPThreads; i++) {
440: if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
441: 0, 0, 0)) {
442: errval = GetLastError();
443: isc__strerror(errval, strbuf, sizeof(strbuf));
444: FATAL_ERROR(__FILE__, __LINE__,
445: isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
446: ISC_MSG_FAILED,
447: "Can't request service thread to exit: %s"),
448: strbuf);
449: }
450: }
451: }
452:
453: /*
454: * Create the worker threads for the I/O Completion Port
455: */
456: void
457: iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
458: int errval;
459: char strbuf[ISC_STRERRORSIZE];
460: int i;
461:
462: INSIST(total_threads > 0);
463: REQUIRE(VALID_MANAGER(manager));
464: /*
465: * We need at least one
466: */
467: for (i = 0; i < total_threads; i++) {
468: manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
469: manager, 0,
470: &manager->dwIOCPThreadIds[i]);
471: if (manager->hIOCPThreads[i] == NULL) {
472: errval = GetLastError();
473: isc__strerror(errval, strbuf, sizeof(strbuf));
474: FATAL_ERROR(__FILE__, __LINE__,
475: isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
476: ISC_MSG_FAILED,
477: "Can't create IOCP thread: %s"),
478: strbuf);
479: exit(1);
480: }
481: }
482: }
483:
484: /*
485: * Create/initialise the I/O completion port
486: */
487: void
488: iocompletionport_init(isc_socketmgr_t *manager) {
489: int errval;
490: char strbuf[ISC_STRERRORSIZE];
491:
492: REQUIRE(VALID_MANAGER(manager));
493: /*
494: * Create a private heap to handle the socket overlapped structure
495: * The minimum number of structures is 10, there is no maximum
496: */
497: hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
498: if (hHeapHandle == NULL) {
499: errval = GetLastError();
500: isc__strerror(errval, strbuf, sizeof(strbuf));
501: FATAL_ERROR(__FILE__, __LINE__,
502: isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
503: ISC_MSG_FAILED,
504: "HeapCreate() failed during "
505: "initialization: %s"),
506: strbuf);
507: exit(1);
508: }
509:
510: manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
511:
512: /* Now Create the Completion Port */
513: manager->hIoCompletionPort = CreateIoCompletionPort(
514: INVALID_HANDLE_VALUE, NULL,
515: 0, manager->maxIOCPThreads);
516: if (manager->hIoCompletionPort == NULL) {
517: errval = GetLastError();
518: isc__strerror(errval, strbuf, sizeof(strbuf));
519: FATAL_ERROR(__FILE__, __LINE__,
520: isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
521: ISC_MSG_FAILED,
522: "CreateIoCompletionPort() failed "
523: "during initialization: %s"),
524: strbuf);
525: exit(1);
526: }
527:
528: /*
529: * Worker threads for servicing the I/O
530: */
531: iocompletionport_createthreads(manager->maxIOCPThreads, manager);
532: }
533:
534: /*
535: * Associate a socket with an IO Completion Port. This allows us to queue events for it
536: * and have our worker pool of threads process them.
537: */
538: void
539: iocompletionport_update(isc_socket_t *sock) {
540: HANDLE hiocp;
541: char strbuf[ISC_STRERRORSIZE];
542:
543: REQUIRE(VALID_SOCKET(sock));
544:
545: hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
546: sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
547:
548: if (hiocp == NULL) {
549: DWORD errval = GetLastError();
550: isc__strerror(errval, strbuf, sizeof(strbuf));
551: isc_log_iwrite(isc_lctx,
552: ISC_LOGCATEGORY_GENERAL,
553: ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
554: isc_msgcat, ISC_MSGSET_SOCKET,
555: ISC_MSG_TOOMANYHANDLES,
556: "iocompletionport_update: failed to open"
557: " io completion port: %s",
558: strbuf);
559:
560: /* XXXMLG temporary hack to make failures detected.
561: * This function should return errors to the caller, not
562: * exit here.
563: */
564: FATAL_ERROR(__FILE__, __LINE__,
565: isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
566: ISC_MSG_FAILED,
567: "CreateIoCompletionPort() failed "
568: "during initialization: %s"),
569: strbuf);
570: exit(1);
571: }
572:
573: InterlockedIncrement(&sock->manager->iocp_total);
574: }
575:
576: /*
577: * Routine to cleanup and then close the socket.
578: * Only close the socket here if it is NOT associated
579: * with an event, otherwise the WSAWaitForMultipleEvents
580: * may fail due to the fact that the Wait should not
581: * be running while closing an event or a socket.
582: * The socket is locked before calling this function
583: */
584: void
585: socket_close(isc_socket_t *sock) {
586:
587: REQUIRE(sock != NULL);
588:
589: if (sock->fd != INVALID_SOCKET) {
590: closesocket(sock->fd);
591: sock->fd = INVALID_SOCKET;
592: _set_state(sock, SOCK_CLOSED);
593: InterlockedDecrement(&sock->manager->totalSockets);
594: }
595: }
596:
597: static isc_once_t initialise_once = ISC_ONCE_INIT;
598: static isc_boolean_t initialised = ISC_FALSE;
599:
600: static void
601: initialise(void) {
602: WORD wVersionRequested;
603: WSADATA wsaData;
604: int err;
605: SOCKET sock;
606: GUID GUIDConnectEx = WSAID_CONNECTEX;
607: GUID GUIDAcceptEx = WSAID_ACCEPTEX;
608: GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
609: DWORD dwBytes;
610:
611: /* Need Winsock 2.2 or better */
612: wVersionRequested = MAKEWORD(2, 2);
613:
614: err = WSAStartup(wVersionRequested, &wsaData);
615: if (err != 0) {
616: char strbuf[ISC_STRERRORSIZE];
617: isc__strerror(err, strbuf, sizeof(strbuf));
618: FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
619: isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
620: ISC_MSG_FAILED, "failed"),
621: strbuf);
622: exit(1);
623: }
624: /*
625: * The following APIs do not exist as functions in a library, but we must
626: * ask winsock for them. They are "extensions" -- but why they cannot be
627: * actual functions is beyond me. So, ask winsock for the pointers to the
628: * functions we need.
629: */
630: sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
631: INSIST(sock != INVALID_SOCKET);
632: err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
633: &GUIDConnectEx, sizeof(GUIDConnectEx),
634: &ISCConnectEx, sizeof(ISCConnectEx),
635: &dwBytes, NULL, NULL);
636: INSIST(err == 0);
637:
638: err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
639: &GUIDAcceptEx, sizeof(GUIDAcceptEx),
640: &ISCAcceptEx, sizeof(ISCAcceptEx),
641: &dwBytes, NULL, NULL);
642: INSIST(err == 0);
643:
644: err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
645: &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
646: &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
647: &dwBytes, NULL, NULL);
648: INSIST(err == 0);
649:
650: closesocket(sock);
651:
652: initialised = ISC_TRUE;
653: }
654:
655: /*
656: * Initialize socket services
657: */
658: void
659: InitSockets(void) {
660: RUNTIME_CHECK(isc_once_do(&initialise_once,
661: initialise) == ISC_R_SUCCESS);
662: if (!initialised)
663: exit(1);
664: }
665:
666: int
667: internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
668: struct msghdr *messagehdr, int flags, int *Error)
669: {
670: int Result;
671: DWORD BytesSent;
672: DWORD Flags = flags;
673: int total_sent;
674:
675: *Error = 0;
676: Result = WSASendTo(sock->fd, messagehdr->msg_iov,
677: messagehdr->msg_iovlen, &BytesSent,
678: Flags, (SOCKADDR *)&messagehdr->to_addr,
679: messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
680: NULL);
681:
682: total_sent = (int)BytesSent;
683:
684: /* Check for errors.*/
685: if (Result == SOCKET_ERROR) {
686: *Error = WSAGetLastError();
687:
688: switch (*Error) {
689: case WSA_IO_INCOMPLETE:
690: case WSA_WAIT_IO_COMPLETION:
691: case WSA_IO_PENDING:
692: case NO_ERROR: /* Strange, but okay */
693: sock->pending_iocp++;
694: sock->pending_send++;
695: break;
696:
697: default:
698: return (-1);
699: break;
700: }
701: } else {
702: sock->pending_iocp++;
703: sock->pending_send++;
704: }
705:
706: if (lpo != NULL)
707: return (0);
708: else
709: return (total_sent);
710: }
711:
712: static void
713: queue_receive_request(isc_socket_t *sock) {
714: DWORD Flags = 0;
715: DWORD NumBytes = 0;
716: int total_bytes = 0;
717: int Result;
718: int Error;
719: WSABUF iov[1];
720: IoCompletionInfo *lpo;
721: isc_result_t isc_result;
722:
723: /*
724: * If we already have a receive pending, do nothing.
725: */
726: if (sock->pending_recv > 0)
727: return;
728:
729: /*
730: * If no one is waiting, do nothing.
731: */
732: if (ISC_LIST_EMPTY(sock->recv_list))
733: return;
734:
735: INSIST(sock->recvbuf.remaining == 0);
736: INSIST(sock->fd != INVALID_SOCKET);
737:
738: iov[0].len = sock->recvbuf.len;
739: iov[0].buf = sock->recvbuf.base;
740:
741: lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
742: HEAP_ZERO_MEMORY,
743: sizeof(IoCompletionInfo));
744: RUNTIME_CHECK(lpo != NULL);
745: lpo->request_type = SOCKET_RECV;
746:
747: sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
748:
749: Error = 0;
750: Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
751: &NumBytes, &Flags,
752: (SOCKADDR *)&sock->recvbuf.from_addr,
753: &sock->recvbuf.from_addr_len,
754: (LPWSAOVERLAPPED)lpo, NULL);
755:
756: /* Check for errors. */
757: if (Result == SOCKET_ERROR) {
758: Error = WSAGetLastError();
759:
760: switch (Error) {
761: case WSA_IO_PENDING:
762: sock->pending_iocp++;
763: sock->pending_recv++;
764: break;
765:
766: default:
767: isc_result = isc__errno2result(Error);
768: if (isc_result == ISC_R_UNEXPECTED)
769: UNEXPECTED_ERROR(__FILE__, __LINE__,
770: "WSARecvFrom: Windows error code: %d, isc result %d",
771: Error, isc_result);
772: send_recvdone_abort(sock, isc_result);
773: break;
774: }
775: } else {
776: /*
777: * The recv() finished immediately, but we will still get
778: * a completion event. Rather than duplicate code, let
779: * that thread handle sending the data along its way.
780: */
781: sock->pending_iocp++;
782: sock->pending_recv++;
783: }
784:
785: socket_log(__LINE__, sock, NULL, IOEVENT,
786: isc_msgcat, ISC_MSGSET_SOCKET,
787: ISC_MSG_DOIORECV,
788: "queue_io_request: fd %d result %d error %d",
789: sock->fd, Result, Error);
790:
791: CONSISTENT(sock);
792: }
793:
794: static void
795: manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
796: isc_logmodule_t *module, int level, const char *fmt, ...)
797: {
798: char msgbuf[2048];
799: va_list ap;
800:
801: if (!isc_log_wouldlog(isc_lctx, level))
802: return;
803:
804: va_start(ap, fmt);
805: vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
806: va_end(ap);
807:
808: isc_log_write(isc_lctx, category, module, level,
809: "sockmgr %p: %s", sockmgr, msgbuf);
810: }
811:
812: static void
813: socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
814: isc_logcategory_t *category, isc_logmodule_t *module, int level,
815: isc_msgcat_t *msgcat, int msgset, int message,
816: const char *fmt, ...)
817: {
818: char msgbuf[2048];
819: char peerbuf[256];
820: va_list ap;
821:
822:
823: if (!isc_log_wouldlog(isc_lctx, level))
824: return;
825:
826: va_start(ap, fmt);
827: vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
828: va_end(ap);
829:
830: if (address == NULL) {
831: isc_log_iwrite(isc_lctx, category, module, level,
832: msgcat, msgset, message,
833: "socket %p line %d: %s", sock, lineno, msgbuf);
834: } else {
835: isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
836: isc_log_iwrite(isc_lctx, category, module, level,
837: msgcat, msgset, message,
838: "socket %p line %d peer %s: %s", sock, lineno,
839: peerbuf, msgbuf);
840: }
841:
842: }
843:
844: /*
845: * Make an fd SOCKET non-blocking.
846: */
847: static isc_result_t
848: make_nonblock(SOCKET fd) {
849: int ret;
850: unsigned long flags = 1;
851: char strbuf[ISC_STRERRORSIZE];
852:
853: /* Set the socket to non-blocking */
854: ret = ioctlsocket(fd, FIONBIO, &flags);
855:
856: if (ret == -1) {
857: isc__strerror(errno, strbuf, sizeof(strbuf));
858: UNEXPECTED_ERROR(__FILE__, __LINE__,
859: "ioctlsocket(%d, FIOBIO, %d): %s",
860: fd, flags, strbuf);
861:
862: return (ISC_R_UNEXPECTED);
863: }
864:
865: return (ISC_R_SUCCESS);
866: }
867:
868: /*
869: * Windows 2000 systems incorrectly cause UDP sockets using WASRecvFrom
870: * to not work correctly, returning a WSACONNRESET error when a WSASendTo
871: * fails with an "ICMP port unreachable" response and preventing the
872: * socket from using the WSARecvFrom in subsequent operations.
873: * The function below fixes this, but requires that Windows 2000
874: * Service Pack 2 or later be installed on the system. NT 4.0
875: * systems are not affected by this and work correctly.
876: * See Microsoft Knowledge Base Article Q263823 for details of this.
877: */
878: isc_result_t
879: connection_reset_fix(SOCKET fd) {
880: DWORD dwBytesReturned = 0;
881: BOOL bNewBehavior = FALSE;
882: DWORD status;
883:
884: if (isc_win32os_majorversion() < 5)
885: return (ISC_R_SUCCESS); /* NT 4.0 has no problem */
886:
887: /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
888: status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
889: sizeof(bNewBehavior), NULL, 0,
890: &dwBytesReturned, NULL, NULL);
891: if (status != SOCKET_ERROR)
892: return (ISC_R_SUCCESS);
893: else {
894: UNEXPECTED_ERROR(__FILE__, __LINE__,
895: "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
896: isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
897: ISC_MSG_FAILED, "failed"));
898: return (ISC_R_UNEXPECTED);
899: }
900: }
901:
902: /*
903: * Construct an iov array and attach it to the msghdr passed in. This is
904: * the SEND constructor, which will use the used region of the buffer
905: * (if using a buffer list) or will use the internal region (if a single
906: * buffer I/O is requested).
907: *
908: * Nothing can be NULL, and the done event must list at least one buffer
909: * on the buffer linked list for this function to be meaningful.
910: */
911: static void
912: build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
913: struct msghdr *msg, char *cmsg, WSABUF *iov,
914: IoCompletionInfo *lpo)
915: {
916: unsigned int iovcount;
917: isc_buffer_t *buffer;
918: buflist_t *cpbuffer;
919: isc_region_t used;
920: size_t write_count;
921: size_t skip_count;
922:
923: memset(msg, 0, sizeof(*msg));
924:
925: memcpy(&msg->to_addr, &dev->address.type, dev->address.length);
926: msg->to_addr_len = dev->address.length;
927:
928: buffer = ISC_LIST_HEAD(dev->bufferlist);
929: write_count = 0;
930: iovcount = 0;
931:
932: /*
933: * Single buffer I/O? Skip what we've done so far in this region.
934: */
935: if (buffer == NULL) {
936: write_count = dev->region.length - dev->n;
937: cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
938: RUNTIME_CHECK(cpbuffer != NULL);
939: cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
940: RUNTIME_CHECK(cpbuffer->buf != NULL);
941:
942: socket_log(__LINE__, sock, NULL, TRACE,
943: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
944: "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
945: cpbuffer->buf, write_count);
946:
947: memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count);
948: cpbuffer->buflen = write_count;
949: ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
950: iov[0].buf = cpbuffer->buf;
951: iov[0].len = write_count;
952: iovcount = 1;
953:
954: goto config;
955: }
956:
957: /*
958: * Multibuffer I/O.
959: * Skip the data in the buffer list that we have already written.
960: */
961: skip_count = dev->n;
962: while (buffer != NULL) {
963: REQUIRE(ISC_BUFFER_VALID(buffer));
964: if (skip_count < isc_buffer_usedlength(buffer))
965: break;
966: skip_count -= isc_buffer_usedlength(buffer);
967: buffer = ISC_LIST_NEXT(buffer, link);
968: }
969:
970: while (buffer != NULL) {
971: INSIST(iovcount < MAXSCATTERGATHER_SEND);
972:
973: isc_buffer_usedregion(buffer, &used);
974:
975: if (used.length > 0) {
976: int uselen = used.length - skip_count;
977: cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
978: RUNTIME_CHECK(cpbuffer != NULL);
979: cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
980: RUNTIME_CHECK(cpbuffer->buf != NULL);
981:
982: socket_log(__LINE__, sock, NULL, TRACE,
983: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
984: "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
985: cpbuffer->buf, write_count);
986:
987: memcpy(cpbuffer->buf,(used.base + skip_count), uselen);
988: cpbuffer->buflen = uselen;
989: iov[iovcount].buf = cpbuffer->buf;
990: iov[iovcount].len = used.length - skip_count;
991: write_count += uselen;
992: skip_count = 0;
993: iovcount++;
994: }
995: buffer = ISC_LIST_NEXT(buffer, link);
996: }
997:
998: INSIST(skip_count == 0);
999:
1000: config:
1001: msg->msg_iov = iov;
1002: msg->msg_iovlen = iovcount;
1003: msg->msg_totallen = write_count;
1004: }
1005:
1006: static void
1007: set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1008: isc_socketevent_t *dev)
1009: {
1010: if (sock->type == isc_sockettype_udp) {
1011: if (address != NULL)
1012: dev->address = *address;
1013: else
1014: dev->address = sock->address;
1015: } else if (sock->type == isc_sockettype_tcp) {
1016: INSIST(address == NULL);
1017: dev->address = sock->address;
1018: }
1019: }
1020:
1021: static void
1022: destroy_socketevent(isc_event_t *event) {
1023: isc_socketevent_t *ev = (isc_socketevent_t *)event;
1024:
1025: INSIST(ISC_LIST_EMPTY(ev->bufferlist));
1026:
1027: (ev->destroy)(event);
1028: }
1029:
1030: static isc_socketevent_t *
1031: allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
1032: isc_taskaction_t action, const void *arg)
1033: {
1034: isc_socketevent_t *ev;
1035:
1036: ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
1037: sock, eventtype,
1038: action, arg,
1039: sizeof(*ev));
1040: if (ev == NULL)
1041: return (NULL);
1042:
1043: ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
1044: ISC_LINK_INIT(ev, ev_link);
1045: ISC_LIST_INIT(ev->bufferlist);
1046: ev->region.base = NULL;
1047: ev->n = 0;
1048: ev->offset = 0;
1049: ev->attributes = 0;
1050: ev->destroy = ev->ev_destroy;
1051: ev->ev_destroy = destroy_socketevent;
1052:
1053: return (ev);
1054: }
1055:
1056: #if defined(ISC_SOCKET_DEBUG)
1057: static void
1058: dump_msg(struct msghdr *msg, isc_socket_t *sock) {
1059: unsigned int i;
1060:
1061: printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd);
1062: printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
1063: printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1064: for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
1065: printf("\t\t%d\tbase %p, len %d\n", i,
1066: msg->msg_iov[i].buf,
1067: msg->msg_iov[i].len);
1068: }
1069: #endif
1070:
1071: /*
1072: * map the error code
1073: */
1074: int
1075: map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
1076: char *errorstring, size_t bufsize) {
1077:
1078: int doreturn;
1079: switch (windows_errno) {
1080: case WSAECONNREFUSED:
1081: *isc_errno = ISC_R_CONNREFUSED;
1082: if (sock->connected)
1083: doreturn = DOIO_HARD;
1084: else
1085: doreturn = DOIO_SOFT;
1086: break;
1087: case WSAENETUNREACH:
1088: case ERROR_NETWORK_UNREACHABLE:
1089: *isc_errno = ISC_R_NETUNREACH;
1090: if (sock->connected)
1091: doreturn = DOIO_HARD;
1092: else
1093: doreturn = DOIO_SOFT;
1094: break;
1095: case ERROR_PORT_UNREACHABLE:
1096: case ERROR_HOST_UNREACHABLE:
1097: case WSAEHOSTUNREACH:
1098: *isc_errno = ISC_R_HOSTUNREACH;
1099: if (sock->connected)
1100: doreturn = DOIO_HARD;
1101: else
1102: doreturn = DOIO_SOFT;
1103: break;
1104: case WSAENETDOWN:
1105: *isc_errno = ISC_R_NETDOWN;
1106: if (sock->connected)
1107: doreturn = DOIO_HARD;
1108: else
1109: doreturn = DOIO_SOFT;
1110: break;
1111: case WSAEHOSTDOWN:
1112: *isc_errno = ISC_R_HOSTDOWN;
1113: if (sock->connected)
1114: doreturn = DOIO_HARD;
1115: else
1116: doreturn = DOIO_SOFT;
1117: break;
1118: case WSAEACCES:
1119: *isc_errno = ISC_R_NOPERM;
1120: if (sock->connected)
1121: doreturn = DOIO_HARD;
1122: else
1123: doreturn = DOIO_SOFT;
1124: break;
1125: case WSAECONNRESET:
1126: case WSAENETRESET:
1127: case WSAECONNABORTED:
1128: case WSAEDISCON:
1129: *isc_errno = ISC_R_CONNECTIONRESET;
1130: if (sock->connected)
1131: doreturn = DOIO_HARD;
1132: else
1133: doreturn = DOIO_SOFT;
1134: break;
1135: case WSAENOTCONN:
1136: *isc_errno = ISC_R_NOTCONNECTED;
1137: if (sock->connected)
1138: doreturn = DOIO_HARD;
1139: else
1140: doreturn = DOIO_SOFT;
1141: break;
1142: case ERROR_OPERATION_ABORTED:
1143: case ERROR_CONNECTION_ABORTED:
1144: case ERROR_REQUEST_ABORTED:
1145: *isc_errno = ISC_R_CONNECTIONRESET;
1146: doreturn = DOIO_HARD;
1147: break;
1148: case WSAENOBUFS:
1149: *isc_errno = ISC_R_NORESOURCES;
1150: doreturn = DOIO_HARD;
1151: break;
1152: case WSAEAFNOSUPPORT:
1153: *isc_errno = ISC_R_FAMILYNOSUPPORT;
1154: doreturn = DOIO_HARD;
1155: break;
1156: case WSAEADDRNOTAVAIL:
1157: *isc_errno = ISC_R_ADDRNOTAVAIL;
1158: doreturn = DOIO_HARD;
1159: break;
1160: case WSAEDESTADDRREQ:
1161: *isc_errno = ISC_R_BADADDRESSFORM;
1162: doreturn = DOIO_HARD;
1163: break;
1164: case ERROR_NETNAME_DELETED:
1165: *isc_errno = ISC_R_NETDOWN;
1166: doreturn = DOIO_HARD;
1167: break;
1168: default:
1169: *isc_errno = ISC_R_IOERROR;
1170: doreturn = DOIO_HARD;
1171: break;
1172: }
1173: if (doreturn == DOIO_HARD) {
1174: isc__strerror(windows_errno, errorstring, bufsize);
1175: }
1176: return (doreturn);
1177: }
1178:
1179: static void
1180: fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
1181: isc_region_t r;
1182: int copylen;
1183: isc_buffer_t *buffer;
1184:
1185: INSIST(dev->n < dev->minimum);
1186: INSIST(sock->recvbuf.remaining > 0);
1187: INSIST(sock->pending_recv == 0);
1188:
1189: if (sock->type == isc_sockettype_udp) {
1190: dev->address.length = sock->recvbuf.from_addr_len;
1191: memcpy(&dev->address.type, &sock->recvbuf.from_addr,
1192: sock->recvbuf.from_addr_len);
1193: if (isc_sockaddr_getport(&dev->address) == 0) {
1194: if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1195: socket_log(__LINE__, sock, &dev->address, IOEVENT,
1196: isc_msgcat, ISC_MSGSET_SOCKET,
1197: ISC_MSG_ZEROPORT,
1198: "dropping source port zero packet");
1199: }
1200: sock->recvbuf.remaining = 0;
1201: return;
1202: }
1203: } else if (sock->type == isc_sockettype_tcp) {
1204: dev->address = sock->address;
1205: }
1206:
1207: /*
1208: * Run through the list of buffers we were given, and find the
1209: * first one with space. Once it is found, loop through, filling
1210: * the buffers as much as possible.
1211: */
1212: buffer = ISC_LIST_HEAD(dev->bufferlist);
1213: if (buffer != NULL) { // Multi-buffer receive
1214: while (buffer != NULL && sock->recvbuf.remaining > 0) {
1215: REQUIRE(ISC_BUFFER_VALID(buffer));
1216: if (isc_buffer_availablelength(buffer) > 0) {
1217: isc_buffer_availableregion(buffer, &r);
1218: copylen = min(r.length, sock->recvbuf.remaining);
1219: memcpy(r.base, sock->recvbuf.consume_position, copylen);
1220: sock->recvbuf.consume_position += copylen;
1221: sock->recvbuf.remaining -= copylen;
1222: isc_buffer_add(buffer, copylen);
1223: dev->n += copylen;
1224: }
1225: buffer = ISC_LIST_NEXT(buffer, link);
1226: }
1227: } else { // Single-buffer receive
1228: copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
1229: memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen);
1230: sock->recvbuf.consume_position += copylen;
1231: sock->recvbuf.remaining -= copylen;
1232: dev->n += copylen;
1233: }
1234:
1235: /*
1236: * UDP receives are all-consuming. That is, if we have 4k worth of
1237: * data in our receive buffer, and the caller only gave us
1238: * 1k of space, we will toss the remaining 3k of data. TCP
1239: * will keep the extra data around and use it for later requests.
1240: */
1241: if (sock->type == isc_sockettype_udp)
1242: sock->recvbuf.remaining = 0;
1243: }
1244:
1245: /*
1246: * Copy out as much data from the internal buffer to done events.
1247: * As each done event is filled, send it along its way.
1248: */
1249: static void
1250: completeio_recv(isc_socket_t *sock)
1251: {
1252: isc_socketevent_t *dev;
1253:
1254: /*
1255: * If we are in the process of filling our buffer, we cannot
1256: * touch it yet, so don't.
1257: */
1258: if (sock->pending_recv > 0)
1259: return;
1260:
1261: while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
1262: dev = ISC_LIST_HEAD(sock->recv_list);
1263:
1264: /*
1265: * See if we have sufficient data in our receive buffer
1266: * to handle this. If we do, copy out the data.
1267: */
1268: fill_recv(sock, dev);
1269:
1270: /*
1271: * Did we satisfy it?
1272: */
1273: if (dev->n >= dev->minimum) {
1274: dev->result = ISC_R_SUCCESS;
1275: send_recvdone_event(sock, &dev);
1276: }
1277: }
1278: }
1279:
1280: /*
1281: * Returns:
1282: * DOIO_SUCCESS The operation succeeded. dev->result contains
1283: * ISC_R_SUCCESS.
1284: *
1285: * DOIO_HARD A hard or unexpected I/O error was encountered.
1286: * dev->result contains the appropriate error.
1287: *
1288: * DOIO_SOFT A soft I/O error was encountered. No senddone
1289: * event was sent. The operation should be retried.
1290: *
1291: * No other return values are possible.
1292: */
1293: static int
1294: completeio_send(isc_socket_t *sock, isc_socketevent_t *dev,
1295: struct msghdr *messagehdr, int cc, int send_errno)
1296: {
1297: char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1298: char strbuf[ISC_STRERRORSIZE];
1299:
1300: if (send_errno != 0) {
1301: if (SOFT_ERROR(send_errno))
1302: return (DOIO_SOFT);
1303:
1304: return (map_socket_error(sock, send_errno, &dev->result,
1305: strbuf, sizeof(strbuf)));
1306:
1307: /*
1308: * The other error types depend on whether or not the
1309: * socket is UDP or TCP. If it is UDP, some errors
1310: * that we expect to be fatal under TCP are merely
1311: * annoying, and are really soft errors.
1312: *
1313: * However, these soft errors are still returned as
1314: * a status.
1315: */
1316: isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
1317: isc__strerror(send_errno, strbuf, sizeof(strbuf));
1318: UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s",
1319: addrbuf, strbuf);
1320: dev->result = isc__errno2result(send_errno);
1321: return (DOIO_HARD);
1322: }
1323:
1324: /*
1325: * If we write less than we expected, update counters, poke.
1326: */
1327: dev->n += cc;
1328: if (cc != messagehdr->msg_totallen)
1329: return (DOIO_SOFT);
1330:
1331: /*
1332: * Exactly what we wanted to write. We're done with this
1333: * entry. Post its completion event.
1334: */
1335: dev->result = ISC_R_SUCCESS;
1336: return (DOIO_SUCCESS);
1337: }
1338:
1339: static int
1340: startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
1341: int *send_errno)
1342: {
1343: char *cmsg = NULL;
1344: char strbuf[ISC_STRERRORSIZE];
1345: IoCompletionInfo *lpo;
1346: int status;
1347: struct msghdr *msghdr;
1348:
1349: lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
1350: HEAP_ZERO_MEMORY,
1351: sizeof(IoCompletionInfo));
1352: RUNTIME_CHECK(lpo != NULL);
1353: lpo->request_type = SOCKET_SEND;
1354: lpo->dev = dev;
1355: msghdr = &lpo->messagehdr;
1356: memset(msghdr, 0, sizeof(struct msghdr));
1357: ISC_LIST_INIT(lpo->bufferlist);
1358:
1359: build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo);
1360:
1361: *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno);
1362:
1363: if (*nbytes < 0) {
1364: /*
1365: * I/O has been initiated
1366: * completion will be through the completion port
1367: */
1368: if (PENDING_ERROR(*send_errno)) {
1369: status = DOIO_PENDING;
1370: goto done;
1371: }
1372:
1373: if (SOFT_ERROR(*send_errno)) {
1374: status = DOIO_SOFT;
1375: goto done;
1376: }
1377:
1378: /*
1379: * If we got this far then something is wrong
1380: */
1381: if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1382: isc__strerror(*send_errno, strbuf, sizeof(strbuf));
1383: socket_log(__LINE__, sock, NULL, IOEVENT,
1384: isc_msgcat, ISC_MSGSET_SOCKET,
1385: ISC_MSG_INTERNALSEND,
1386: "startio_send: internal_sendmsg(%d) %d "
1387: "bytes, err %d/%s",
1388: sock->fd, *nbytes, *send_errno, strbuf);
1389: }
1390: goto done;
1391: }
1392: dev->result = ISC_R_SUCCESS;
1393: status = DOIO_SOFT;
1394: done:
1395: _set_state(sock, SOCK_DATA);
1396: return (status);
1397: }
1398:
1399: static isc_result_t
1400: allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1401: isc_socket_t **socketp) {
1402: isc_socket_t *sock;
1403: isc_result_t result;
1404:
1405: sock = isc_mem_get(manager->mctx, sizeof(*sock));
1406:
1407: if (sock == NULL)
1408: return (ISC_R_NOMEMORY);
1409:
1410: sock->magic = 0;
1411: sock->references = 0;
1412:
1413: sock->manager = manager;
1414: sock->type = type;
1415: sock->fd = INVALID_SOCKET;
1416:
1417: ISC_LINK_INIT(sock, link);
1418:
1419: /*
1420: * set up list of readers and writers to be initially empty
1421: */
1422: ISC_LIST_INIT(sock->recv_list);
1423: ISC_LIST_INIT(sock->send_list);
1424: ISC_LIST_INIT(sock->accept_list);
1425: sock->connect_ev = NULL;
1426: sock->pending_accept = 0;
1427: sock->pending_recv = 0;
1428: sock->pending_send = 0;
1429: sock->pending_iocp = 0;
1430: sock->listener = 0;
1431: sock->connected = 0;
1432: sock->pending_connect = 0;
1433: sock->bound = 0;
1434: memset(sock->name, 0, sizeof(sock->name)); // zero the name field
1435: _set_state(sock, SOCK_INITIALIZED);
1436:
1437: sock->recvbuf.len = 65536;
1438: sock->recvbuf.consume_position = sock->recvbuf.base;
1439: sock->recvbuf.remaining = 0;
1440: sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
1441: if (sock->recvbuf.base == NULL) {
1442: sock->magic = 0;
1443: goto error;
1444: }
1445:
1446: /*
1447: * initialize the lock
1448: */
1449: result = isc_mutex_init(&sock->lock);
1450: if (result != ISC_R_SUCCESS) {
1451: sock->magic = 0;
1452: isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1453: sock->recvbuf.base = NULL;
1454: goto error;
1455: }
1456:
1457: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1458: "allocated");
1459:
1460: sock->magic = SOCKET_MAGIC;
1461: *socketp = sock;
1462:
1463: return (ISC_R_SUCCESS);
1464:
1465: error:
1466: isc_mem_put(manager->mctx, sock, sizeof(*sock));
1467:
1468: return (result);
1469: }
1470:
1471: /*
1472: * Verify that the socket state is consistent.
1473: */
1474: static void
1475: consistent(isc_socket_t *sock) {
1476:
1477: isc_socketevent_t *dev;
1478: isc_socket_newconnev_t *nev;
1479: unsigned int count;
1480: char *crash_reason;
1481: isc_boolean_t crash = ISC_FALSE;
1482:
1483: REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
1484: + sock->pending_accept + sock->pending_connect);
1485:
1486: dev = ISC_LIST_HEAD(sock->send_list);
1487: count = 0;
1488: while (dev != NULL) {
1489: count++;
1490: dev = ISC_LIST_NEXT(dev, ev_link);
1491: }
1492: if (count > sock->pending_send) {
1493: crash = ISC_TRUE;
1494: crash_reason = "send_list > sock->pending_send";
1495: }
1496:
1497: nev = ISC_LIST_HEAD(sock->accept_list);
1498: count = 0;
1499: while (nev != NULL) {
1500: count++;
1501: nev = ISC_LIST_NEXT(nev, ev_link);
1502: }
1503: if (count > sock->pending_accept) {
1504: crash = ISC_TRUE;
1505: crash_reason = "send_list > sock->pending_send";
1506: }
1507:
1508: if (crash) {
1509: socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1510: ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
1511: crash_reason);
1512: sock_dump(sock);
1513: INSIST(crash == ISC_FALSE);
1514: }
1515: }
1516:
1517: /*
1518: * Maybe free the socket.
1519: *
1520: * This function will verify tht the socket is no longer in use in any way,
1521: * either internally or externally. This is the only place where this
1522: * check is to be made; if some bit of code believes that IT is done with
1523: * the socket (e.g., some reference counter reaches zero), it should call
1524: * this function.
1525: *
1526: * When calling this function, the socket must be locked, and the manager
1527: * must be unlocked.
1528: *
1529: * When this function returns, *socketp will be NULL. No tricks to try
1530: * to hold on to this pointer are allowed.
1531: */
1532: static void
1533: maybe_free_socket(isc_socket_t **socketp, int lineno) {
1534: isc_socket_t *sock = *socketp;
1535: *socketp = NULL;
1536:
1537: INSIST(VALID_SOCKET(sock));
1538: CONSISTENT(sock);
1539:
1540: if (sock->pending_iocp > 0
1541: || sock->pending_recv > 0
1542: || sock->pending_send > 0
1543: || sock->pending_accept > 0
1544: || sock->references > 0
1545: || sock->pending_connect == 1
1546: || !ISC_LIST_EMPTY(sock->recv_list)
1547: || !ISC_LIST_EMPTY(sock->send_list)
1548: || !ISC_LIST_EMPTY(sock->accept_list)
1549: || sock->fd != INVALID_SOCKET) {
1550: UNLOCK(&sock->lock);
1551: return;
1552: }
1553: UNLOCK(&sock->lock);
1554:
1555: free_socket(&sock, lineno);
1556: }
1557:
1558: void
1559: free_socket(isc_socket_t **sockp, int lineno) {
1560: isc_socketmgr_t *manager;
1561: isc_socket_t *sock = *sockp;
1562: *sockp = NULL;
1563:
1564: manager = sock->manager;
1565:
1566: /*
1567: * Seems we can free the socket after all.
1568: */
1569: manager = sock->manager;
1570: socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1571: ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p",
1572: lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
1573:
1574: sock->magic = 0;
1575: DESTROYLOCK(&sock->lock);
1576:
1577: if (sock->recvbuf.base != NULL)
1578: isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1579:
1580: LOCK(&manager->lock);
1581: if (ISC_LINK_LINKED(sock, link))
1582: ISC_LIST_UNLINK(manager->socklist, sock, link);
1583: isc_mem_put(manager->mctx, sock, sizeof(*sock));
1584:
1585: if (ISC_LIST_EMPTY(manager->socklist))
1586: SIGNAL(&manager->shutdown_ok);
1587: UNLOCK(&manager->lock);
1588: }
1589:
1590: /*
1591: * Create a new 'type' socket managed by 'manager'. Events
1592: * will be posted to 'task' and when dispatched 'action' will be
1593: * called with 'arg' as the arg value. The new socket is returned
1594: * in 'socketp'.
1595: */
1596: isc_result_t
1597: isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1598: isc_socket_t **socketp) {
1599: isc_socket_t *sock = NULL;
1600: isc_result_t result;
1601: #if defined(USE_CMSG)
1602: int on = 1;
1603: #endif
1604: #if defined(SO_RCVBUF)
1605: ISC_SOCKADDR_LEN_T optlen;
1606: int size;
1607: #endif
1608: int socket_errno;
1609: char strbuf[ISC_STRERRORSIZE];
1610:
1611: REQUIRE(VALID_MANAGER(manager));
1612: REQUIRE(socketp != NULL && *socketp == NULL);
1613: REQUIRE(type != isc_sockettype_fdwatch);
1614:
1615: result = allocate_socket(manager, type, &sock);
1616: if (result != ISC_R_SUCCESS)
1617: return (result);
1618:
1619: sock->pf = pf;
1620: switch (type) {
1621: case isc_sockettype_udp:
1622: sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1623: if (sock->fd != INVALID_SOCKET) {
1624: result = connection_reset_fix(sock->fd);
1625: if (result != ISC_R_SUCCESS) {
1626: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1627: "closed %d %d %d con_reset_fix_failed",
1628: sock->pending_recv, sock->pending_send,
1629: sock->references);
1630: closesocket(sock->fd);
1631: _set_state(sock, SOCK_CLOSED);
1632: sock->fd = INVALID_SOCKET;
1633: free_socket(&sock, __LINE__);
1634: return (result);
1635: }
1636: }
1637: break;
1638: case isc_sockettype_tcp:
1639: sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1640: break;
1641: }
1642:
1643: if (sock->fd == INVALID_SOCKET) {
1644: socket_errno = WSAGetLastError();
1645: free_socket(&sock, __LINE__);
1646:
1647: switch (socket_errno) {
1648: case WSAEMFILE:
1649: case WSAENOBUFS:
1650: return (ISC_R_NORESOURCES);
1651:
1652: case WSAEPROTONOSUPPORT:
1653: case WSAEPFNOSUPPORT:
1654: case WSAEAFNOSUPPORT:
1655: return (ISC_R_FAMILYNOSUPPORT);
1656:
1657: default:
1658: isc__strerror(socket_errno, strbuf, sizeof(strbuf));
1659: UNEXPECTED_ERROR(__FILE__, __LINE__,
1660: "socket() %s: %s",
1661: isc_msgcat_get(isc_msgcat,
1662: ISC_MSGSET_GENERAL,
1663: ISC_MSG_FAILED,
1664: "failed"),
1665: strbuf);
1666: return (ISC_R_UNEXPECTED);
1667: }
1668: }
1669:
1670: result = make_nonblock(sock->fd);
1671: if (result != ISC_R_SUCCESS) {
1672: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1673: "closed %d %d %d make_nonblock_failed",
1674: sock->pending_recv, sock->pending_send,
1675: sock->references);
1676: closesocket(sock->fd);
1677: sock->fd = INVALID_SOCKET;
1678: free_socket(&sock, __LINE__);
1679: return (result);
1680: }
1681:
1682:
1683: #if defined(USE_CMSG) || defined(SO_RCVBUF)
1684: if (type == isc_sockettype_udp) {
1685:
1686: #if defined(USE_CMSG)
1687: #if defined(ISC_PLATFORM_HAVEIPV6)
1688: #ifdef IPV6_RECVPKTINFO
1689: /* 2292bis */
1690: if ((pf == AF_INET6)
1691: && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1692: (void *)&on, sizeof(on)) < 0)) {
1693: isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1694: UNEXPECTED_ERROR(__FILE__, __LINE__,
1695: "setsockopt(%d, IPV6_RECVPKTINFO) "
1696: "%s: %s", sock->fd,
1697: isc_msgcat_get(isc_msgcat,
1698: ISC_MSGSET_GENERAL,
1699: ISC_MSG_FAILED,
1700: "failed"),
1701: strbuf);
1702: }
1703: #else
1704: /* 2292 */
1705: if ((pf == AF_INET6)
1706: && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1707: (void *)&on, sizeof(on)) < 0)) {
1708: isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1709: UNEXPECTED_ERROR(__FILE__, __LINE__,
1710: "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1711: sock->fd,
1712: isc_msgcat_get(isc_msgcat,
1713: ISC_MSGSET_GENERAL,
1714: ISC_MSG_FAILED,
1715: "failed"),
1716: strbuf);
1717: }
1718: #endif /* IPV6_RECVPKTINFO */
1719: #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1720: /* use minimum MTU */
1721: if (pf == AF_INET6) {
1722: (void)setsockopt(sock->fd, IPPROTO_IPV6,
1723: IPV6_USE_MIN_MTU,
1724: (void *)&on, sizeof(on));
1725: }
1726: #endif
1727: #endif /* ISC_PLATFORM_HAVEIPV6 */
1728: #endif /* defined(USE_CMSG) */
1729:
1730: #if defined(SO_RCVBUF)
1731: optlen = sizeof(size);
1732: if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1733: (void *)&size, &optlen) >= 0 &&
1734: size < RCVBUFSIZE) {
1735: size = RCVBUFSIZE;
1736: (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1737: (void *)&size, sizeof(size));
1738: }
1739: #endif
1740:
1741: }
1742: #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1743:
1744: _set_state(sock, SOCK_OPEN);
1745: sock->references = 1;
1746: *socketp = sock;
1747:
1748: iocompletionport_update(sock);
1749:
1750: /*
1751: * Note we don't have to lock the socket like we normally would because
1752: * there are no external references to it yet.
1753: */
1754: LOCK(&manager->lock);
1755: ISC_LIST_APPEND(manager->socklist, sock, link);
1756: InterlockedIncrement(&manager->totalSockets);
1757: UNLOCK(&manager->lock);
1758:
1759: socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1760: ISC_MSG_CREATED, "created %u type %u", sock->fd, type);
1761:
1762: return (ISC_R_SUCCESS);
1763: }
1764:
1765: isc_result_t
1766: isc_socket_open(isc_socket_t *sock) {
1767: REQUIRE(VALID_SOCKET(sock));
1768: REQUIRE(sock->type != isc_sockettype_fdwatch);
1769:
1770: return (ISC_R_NOTIMPLEMENTED);
1771: }
1772:
1773: /*
1774: * Attach to a socket. Caller must explicitly detach when it is done.
1775: */
1776: void
1777: isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1778: REQUIRE(VALID_SOCKET(sock));
1779: REQUIRE(socketp != NULL && *socketp == NULL);
1780:
1781: LOCK(&sock->lock);
1782: CONSISTENT(sock);
1783: sock->references++;
1784: UNLOCK(&sock->lock);
1785:
1786: *socketp = sock;
1787: }
1788:
1789: /*
1790: * Dereference a socket. If this is the last reference to it, clean things
1791: * up by destroying the socket.
1792: */
1793: void
1794: isc_socket_detach(isc_socket_t **socketp) {
1795: isc_socket_t *sock;
1796: isc_boolean_t kill_socket = ISC_FALSE;
1797:
1798: REQUIRE(socketp != NULL);
1799: sock = *socketp;
1800: REQUIRE(VALID_SOCKET(sock));
1801: REQUIRE(sock->type != isc_sockettype_fdwatch);
1802:
1803: LOCK(&sock->lock);
1804: CONSISTENT(sock);
1805: REQUIRE(sock->references > 0);
1806: sock->references--;
1807:
1808: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1809: "detach_socket %d %d %d",
1810: sock->pending_recv, sock->pending_send,
1811: sock->references);
1812:
1813: if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
1814: closesocket(sock->fd);
1815: sock->fd = INVALID_SOCKET;
1816: _set_state(sock, SOCK_CLOSED);
1817: }
1818:
1819: maybe_free_socket(&sock, __LINE__);
1820:
1821: *socketp = NULL;
1822: }
1823:
1824: isc_result_t
1825: isc_socket_close(isc_socket_t *sock) {
1826: REQUIRE(VALID_SOCKET(sock));
1827: REQUIRE(sock->type != isc_sockettype_fdwatch);
1828:
1829: return (ISC_R_NOTIMPLEMENTED);
1830: }
1831:
1832: /*
1833: * Dequeue an item off the given socket's read queue, set the result code
1834: * in the done event to the one provided, and send it to the task it was
1835: * destined for.
1836: *
1837: * If the event to be sent is on a list, remove it before sending. If
1838: * asked to, send and detach from the task as well.
1839: *
1840: * Caller must have the socket locked if the event is attached to the socket.
1841: */
1842: static void
1843: send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1844: isc_task_t *task;
1845:
1846: task = (*dev)->ev_sender;
1847: (*dev)->ev_sender = sock;
1848:
1849: if (ISC_LINK_LINKED(*dev, ev_link))
1850: ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1851:
1852: if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1853: == ISC_SOCKEVENTATTR_ATTACHED)
1854: isc_task_sendanddetach(&task, (isc_event_t **)dev);
1855: else
1856: isc_task_send(task, (isc_event_t **)dev);
1857:
1858: CONSISTENT(sock);
1859: }
1860:
1861: /*
1862: * See comments for send_recvdone_event() above.
1863: */
1864: static void
1865: send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1866: isc_task_t *task;
1867:
1868: INSIST(dev != NULL && *dev != NULL);
1869:
1870: task = (*dev)->ev_sender;
1871: (*dev)->ev_sender = sock;
1872:
1873: if (ISC_LINK_LINKED(*dev, ev_link))
1874: ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
1875:
1876: if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1877: == ISC_SOCKEVENTATTR_ATTACHED)
1878: isc_task_sendanddetach(&task, (isc_event_t **)dev);
1879: else
1880: isc_task_send(task, (isc_event_t **)dev);
1881:
1882: CONSISTENT(sock);
1883: }
1884:
1885: /*
1886: * See comments for send_recvdone_event() above.
1887: */
1888: static void
1889: send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
1890: isc_task_t *task;
1891:
1892: INSIST(adev != NULL && *adev != NULL);
1893:
1894: task = (*adev)->ev_sender;
1895: (*adev)->ev_sender = sock;
1896:
1897: if (ISC_LINK_LINKED(*adev, ev_link))
1898: ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
1899:
1900: isc_task_sendanddetach(&task, (isc_event_t **)adev);
1901:
1902: CONSISTENT(sock);
1903: }
1904:
1905: /*
1906: * See comments for send_recvdone_event() above.
1907: */
1908: static void
1909: send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
1910: isc_task_t *task;
1911:
1912: INSIST(cdev != NULL && *cdev != NULL);
1913:
1914: task = (*cdev)->ev_sender;
1915: (*cdev)->ev_sender = sock;
1916:
1917: sock->connect_ev = NULL;
1918:
1919: isc_task_sendanddetach(&task, (isc_event_t **)cdev);
1920:
1921: CONSISTENT(sock);
1922: }
1923:
1924: /*
1925: * On entry to this function, the event delivered is the internal
1926: * readable event, and the first item on the accept_list should be
1927: * the done event we want to send. If the list is empty, this is a no-op,
1928: * so just close the new connection, unlock, and return.
1929: *
1930: * Note the socket is locked before entering here
1931: */
1932: static void
1933: internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
1934: isc_socket_newconnev_t *adev;
1935: isc_result_t result = ISC_R_SUCCESS;
1936: isc_socket_t *nsock;
1937: struct sockaddr *localaddr;
1938: int localaddr_len = sizeof(*localaddr);
1939: struct sockaddr *remoteaddr;
1940: int remoteaddr_len = sizeof(*remoteaddr);
1941:
1942: INSIST(VALID_SOCKET(sock));
1943: LOCK(&sock->lock);
1944: CONSISTENT(sock);
1945:
1946: socket_log(__LINE__, sock, NULL, TRACE,
1947: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1948: "internal_accept called");
1949:
1950: INSIST(sock->listener);
1951:
1952: INSIST(sock->pending_iocp > 0);
1953: sock->pending_iocp--;
1954: INSIST(sock->pending_accept > 0);
1955: sock->pending_accept--;
1956:
1957: adev = lpo->adev;
1958:
1959: /*
1960: * If the event is no longer in the list we can just return.
1961: */
1962: if (!acceptdone_is_active(sock, adev))
1963: goto done;
1964:
1965: nsock = adev->newsocket;
1966:
1967: /*
1968: * Pull off the done event.
1969: */
1970: ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
1971:
1972: /*
1973: * Extract the addresses from the socket, copy them into the structure,
1974: * and return the new socket.
1975: */
1976: ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
1977: sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16,
1978: (LPSOCKADDR *)&localaddr, &localaddr_len,
1979: (LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
1980: memcpy(&adev->address.type, remoteaddr, remoteaddr_len);
1981: adev->address.length = remoteaddr_len;
1982: nsock->address = adev->address;
1983: nsock->pf = adev->address.type.sa.sa_family;
1984:
1985: socket_log(__LINE__, nsock, &nsock->address, TRACE,
1986: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1987: "internal_accept parent %p", sock);
1988:
1989: result = make_nonblock(adev->newsocket->fd);
1990: INSIST(result == ISC_R_SUCCESS);
1991:
1992: INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
1993: (char *)&sock->fd, sizeof(sock->fd)) == 0);
1994:
1995: /*
1996: * Hook it up into the manager.
1997: */
1998: nsock->bound = 1;
1999: nsock->connected = 1;
2000: _set_state(nsock, SOCK_OPEN);
2001:
2002: LOCK(&nsock->manager->lock);
2003: ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
2004: InterlockedIncrement(&nsock->manager->totalSockets);
2005: UNLOCK(&nsock->manager->lock);
2006:
2007: socket_log(__LINE__, sock, &nsock->address, CREATION,
2008: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2009: "accepted_connection new_socket %p fd %d",
2010: nsock, nsock->fd);
2011:
2012: adev->result = result;
2013: send_acceptdone_event(sock, &adev);
2014:
2015: done:
2016: CONSISTENT(sock);
2017: UNLOCK(&sock->lock);
2018:
2019: HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
2020: lpo->acceptbuffer = NULL;
2021: }
2022:
2023: /*
2024: * Called when a socket with a pending connect() finishes.
2025: * Note that the socket is locked before entering.
2026: */
2027: static void
2028: internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
2029: isc_socket_connev_t *cdev;
2030: char strbuf[ISC_STRERRORSIZE];
2031:
2032: INSIST(VALID_SOCKET(sock));
2033:
2034: LOCK(&sock->lock);
2035:
2036: INSIST(sock->pending_iocp > 0);
2037: sock->pending_iocp--;
2038: INSIST(sock->pending_connect == 1);
2039: sock->pending_connect = 0;
2040:
2041: /*
2042: * Has this event been canceled?
2043: */
2044: cdev = lpo->cdev;
2045: if (!connectdone_is_active(sock, cdev)) {
2046: sock->pending_connect = 0;
2047: if (sock->fd != INVALID_SOCKET) {
2048: closesocket(sock->fd);
2049: sock->fd = INVALID_SOCKET;
2050: _set_state(sock, SOCK_CLOSED);
2051: }
2052: CONSISTENT(sock);
2053: UNLOCK(&sock->lock);
2054: return;
2055: }
2056:
2057: /*
2058: * Check possible Windows network event error status here.
2059: */
2060: if (connect_errno != 0) {
2061: /*
2062: * If the error is SOFT, just try again on this
2063: * fd and pretend nothing strange happened.
2064: */
2065: if (SOFT_ERROR(connect_errno) ||
2066: connect_errno == WSAEINPROGRESS) {
2067: sock->pending_connect = 1;
2068: CONSISTENT(sock);
2069: UNLOCK(&sock->lock);
2070: return;
2071: }
2072:
2073: /*
2074: * Translate other errors into ISC_R_* flavors.
2075: */
2076: switch (connect_errno) {
2077: #define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2078: ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
2079: ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
2080: ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
2081: ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
2082: ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
2083: ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN);
2084: ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
2085: ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN);
2086: ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
2087: ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET);
2088: ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
2089: ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
2090: #undef ERROR_MATCH
2091: default:
2092: cdev->result = ISC_R_UNEXPECTED;
2093: isc__strerror(connect_errno, strbuf, sizeof(strbuf));
2094: UNEXPECTED_ERROR(__FILE__, __LINE__,
2095: "internal_connect: connect() %s",
2096: strbuf);
2097: }
2098: } else {
2099: INSIST(setsockopt(sock->fd, SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
2100: cdev->result = ISC_R_SUCCESS;
2101: sock->connected = 1;
2102: socket_log(__LINE__, sock, &sock->address, IOEVENT,
2103: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2104: "internal_connect: success");
2105: }
2106:
2107: send_connectdone_event(sock, &cdev);
2108:
2109: UNLOCK(&sock->lock);
2110: }
2111:
2112: /*
2113: * Loop through the socket, returning ISC_R_EOF for each done event pending.
2114: */
2115: static void
2116: send_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
2117: isc_socketevent_t *dev;
2118:
2119: while (!ISC_LIST_EMPTY(sock->recv_list)) {
2120: dev = ISC_LIST_HEAD(sock->recv_list);
2121: dev->result = result;
2122: send_recvdone_event(sock, &dev);
2123: }
2124: }
2125:
2126: /*
2127: * Take the data we received in our private buffer, and if any recv() calls on
2128: * our list are satisfied, send the corresponding done event.
2129: *
2130: * If we need more data (there are still items on the recv_list after we consume all
2131: * our data) then arrange for another system recv() call to fill our buffers.
2132: */
2133: static void
2134: internal_recv(isc_socket_t *sock, int nbytes)
2135: {
2136: INSIST(VALID_SOCKET(sock));
2137:
2138: LOCK(&sock->lock);
2139: CONSISTENT(sock);
2140:
2141: socket_log(__LINE__, sock, NULL, IOEVENT,
2142: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2143: "internal_recv: %d bytes received", nbytes);
2144:
2145: /*
2146: * If we got here, the I/O operation succeeded. However, we might still have removed this
2147: * event from our notification list (or never placed it on it due to immediate completion.)
2148: * Handle the reference counting here, and handle the cancellation event just after.
2149: */
2150: INSIST(sock->pending_iocp > 0);
2151: sock->pending_iocp--;
2152: INSIST(sock->pending_recv > 0);
2153: sock->pending_recv--;
2154:
2155: /*
2156: * The only way we could have gotten here is that our I/O has successfully completed.
2157: * Update our pointers, and move on. The only odd case here is that we might not
2158: * have received enough data on a TCP stream to satisfy the minimum requirements. If
2159: * this is the case, we will re-issue the recv() call for what we need.
2160: *
2161: * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end
2162: * has closed.
2163: */
2164: if (nbytes == 0 && sock->type == isc_sockettype_tcp) {
2165: send_recvdone_abort(sock, ISC_R_EOF);
2166: maybe_free_socket(&sock, __LINE__);
2167: return;
2168: }
2169: sock->recvbuf.remaining = nbytes;
2170: sock->recvbuf.consume_position = sock->recvbuf.base;
2171: completeio_recv(sock);
2172:
2173: /*
2174: * If there are more receivers waiting for data, queue another receive
2175: * here.
2176: */
2177: queue_receive_request(sock);
2178:
2179: /*
2180: * Unlock and/or destroy if we are the last thing this socket has left to do.
2181: */
2182: maybe_free_socket(&sock, __LINE__);
2183: }
2184:
2185: static void
2186: internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
2187: struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
2188: {
2189: buflist_t *buffer;
2190:
2191: /*
2192: * Find out what socket this is and lock it.
2193: */
2194: INSIST(VALID_SOCKET(sock));
2195:
2196: LOCK(&sock->lock);
2197: CONSISTENT(sock);
2198:
2199: socket_log(__LINE__, sock, NULL, IOEVENT,
2200: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2201: "internal_send: task got socket event %p", dev);
2202:
2203: buffer = ISC_LIST_HEAD(lpo->bufferlist);
2204: while (buffer != NULL) {
2205: ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
2206:
2207: socket_log(__LINE__, sock, NULL, TRACE,
2208: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2209: "free_buffer %p %p", buffer, buffer->buf);
2210:
2211: HeapFree(hHeapHandle, 0, buffer->buf);
2212: HeapFree(hHeapHandle, 0, buffer);
2213: buffer = ISC_LIST_HEAD(lpo->bufferlist);
2214: }
2215:
2216: INSIST(sock->pending_iocp > 0);
2217: sock->pending_iocp--;
2218: INSIST(sock->pending_send > 0);
2219: sock->pending_send--;
2220:
2221: /* If the event is no longer in the list we can just return */
2222: if (!senddone_is_active(sock, dev))
2223: goto done;
2224:
2225: /*
2226: * Set the error code and send things on its way.
2227: */
2228: switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
2229: case DOIO_SOFT:
2230: break;
2231: case DOIO_HARD:
2232: case DOIO_SUCCESS:
2233: send_senddone_event(sock, &dev);
2234: break;
2235: }
2236:
2237: done:
2238: maybe_free_socket(&sock, __LINE__);
2239: }
2240:
2241: /*
2242: * These return if the done event passed in is on the list (or for connect, is
2243: * the one we're waiting for. Using these ensures we will not double-send an
2244: * event.
2245: */
2246: static isc_boolean_t
2247: senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
2248: {
2249: isc_socketevent_t *ldev;
2250:
2251: ldev = ISC_LIST_HEAD(sock->send_list);
2252: while (ldev != NULL && ldev != dev)
2253: ldev = ISC_LIST_NEXT(ldev, ev_link);
2254:
2255: return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2256: }
2257:
2258: static isc_boolean_t
2259: acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
2260: {
2261: isc_socket_newconnev_t *ldev;
2262:
2263: ldev = ISC_LIST_HEAD(sock->accept_list);
2264: while (ldev != NULL && ldev != dev)
2265: ldev = ISC_LIST_NEXT(ldev, ev_link);
2266:
2267: return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2268: }
2269:
2270: static isc_boolean_t
2271: connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
2272: {
2273: return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE);
2274: }
2275:
2276: /*
2277: * This is the I/O Completion Port Worker Function. It loops forever
2278: * waiting for I/O to complete and then forwards them for further
2279: * processing. There are a number of these in separate threads.
2280: */
2281: static isc_threadresult_t WINAPI
2282: SocketIoThread(LPVOID ThreadContext) {
2283: isc_socketmgr_t *manager = ThreadContext;
2284: BOOL bSuccess = FALSE;
2285: DWORD nbytes;
2286: IoCompletionInfo *lpo = NULL;
2287: isc_socket_t *sock = NULL;
2288: int request;
2289: struct msghdr *messagehdr = NULL;
2290: int errval;
2291: char strbuf[ISC_STRERRORSIZE];
2292: int errstatus;
2293:
2294: REQUIRE(VALID_MANAGER(manager));
2295:
2296: /*
2297: * Set the thread priority high enough so I/O will
2298: * preempt normal recv packet processing, but not
2299: * higher than the timer sync thread.
2300: */
2301: if (!SetThreadPriority(GetCurrentThread(),
2302: THREAD_PRIORITY_ABOVE_NORMAL)) {
2303: errval = GetLastError();
2304: isc__strerror(errval, strbuf, sizeof(strbuf));
2305: FATAL_ERROR(__FILE__, __LINE__,
2306: isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2307: ISC_MSG_FAILED,
2308: "Can't set thread priority: %s"),
2309: strbuf);
2310: }
2311:
2312: /*
2313: * Loop forever waiting on I/O Completions and then processing them
2314: */
2315: while (TRUE) {
2316: bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
2317: &nbytes, (LPDWORD)&sock,
2318: (LPWSAOVERLAPPED *)&lpo,
2319: INFINITE);
2320: if (lpo == NULL) /* Received request to exit */
2321: break;
2322:
2323: REQUIRE(VALID_SOCKET(sock));
2324:
2325: request = lpo->request_type;
2326:
2327: errstatus = 0;
2328: if (!bSuccess) {
2329: isc_result_t isc_result;
2330:
2331: /*
2332: * Did the I/O operation complete?
2333: */
2334: errstatus = WSAGetLastError();
2335: isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2336:
2337: LOCK(&sock->lock);
2338: CONSISTENT(sock);
2339: switch (request) {
2340: case SOCKET_RECV:
2341: INSIST(sock->pending_iocp > 0);
2342: sock->pending_iocp--;
2343: INSIST(sock->pending_recv > 0);
2344: sock->pending_recv--;
2345: send_recvdone_abort(sock, isc_result);
2346: if (isc_result == ISC_R_UNEXPECTED) {
2347: UNEXPECTED_ERROR(__FILE__, __LINE__,
2348: "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2349: errstatus, isc_result);
2350: }
2351: break;
2352:
2353: case SOCKET_SEND:
2354: INSIST(sock->pending_iocp > 0);
2355: sock->pending_iocp--;
2356: INSIST(sock->pending_send > 0);
2357: sock->pending_send--;
2358: if (senddone_is_active(sock, lpo->dev)) {
2359: lpo->dev->result = isc_result;
2360: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2361: "canceled_send");
2362: send_senddone_event(sock, &lpo->dev);
2363: }
2364: break;
2365:
2366: case SOCKET_ACCEPT:
2367: INSIST(sock->pending_iocp > 0);
2368: sock->pending_iocp--;
2369: INSIST(sock->pending_accept > 0);
2370: sock->pending_accept--;
2371: if (acceptdone_is_active(sock, lpo->adev)) {
2372: closesocket(lpo->adev->newsocket->fd);
2373: lpo->adev->newsocket->fd = INVALID_SOCKET;
2374: lpo->adev->newsocket->references--;
2375: free_socket(&lpo->adev->newsocket, __LINE__);
2376: lpo->adev->result = isc_result;
2377: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2378: "canceled_accept");
2379: send_acceptdone_event(sock, &lpo->adev);
2380: }
2381: break;
2382:
2383: case SOCKET_CONNECT:
2384: INSIST(sock->pending_iocp > 0);
2385: sock->pending_iocp--;
2386: INSIST(sock->pending_connect == 1);
2387: sock->pending_connect = 0;
2388: if (connectdone_is_active(sock, lpo->cdev)) {
2389: lpo->cdev->result = isc_result;
2390: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2391: "canceled_connect");
2392: send_connectdone_event(sock, &lpo->cdev);
2393: }
2394: break;
2395: }
2396: maybe_free_socket(&sock, __LINE__);
2397:
2398: if (lpo != NULL)
2399: HeapFree(hHeapHandle, 0, lpo);
2400: continue;
2401: }
2402:
2403: messagehdr = &lpo->messagehdr;
2404:
2405: switch (request) {
2406: case SOCKET_RECV:
2407: internal_recv(sock, nbytes);
2408: break;
2409: case SOCKET_SEND:
2410: internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
2411: break;
2412: case SOCKET_ACCEPT:
2413: internal_accept(sock, lpo, errstatus);
2414: break;
2415: case SOCKET_CONNECT:
2416: internal_connect(sock, lpo, errstatus);
2417: break;
2418: }
2419:
2420: if (lpo != NULL)
2421: HeapFree(hHeapHandle, 0, lpo);
2422: }
2423:
2424: /*
2425: * Exit Completion Port Thread
2426: */
2427: manager_log(manager, TRACE,
2428: isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2429: ISC_MSG_EXITING, "SocketIoThread exiting"));
2430: return ((isc_threadresult_t)0);
2431: }
2432:
2433: /*
2434: * Create a new socket manager.
2435: */
2436: isc_result_t
2437: isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2438: return (isc_socketmgr_create2(mctx, managerp, 0));
2439: }
2440:
2441: isc_result_t
2442: isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
2443: unsigned int maxsocks)
2444: {
2445: isc_socketmgr_t *manager;
2446: isc_result_t result;
2447:
2448: REQUIRE(managerp != NULL && *managerp == NULL);
2449:
2450: if (maxsocks != 0)
2451: return (ISC_R_NOTIMPLEMENTED);
2452:
2453: manager = isc_mem_get(mctx, sizeof(*manager));
2454: if (manager == NULL)
2455: return (ISC_R_NOMEMORY);
2456:
2457: InitSockets();
2458:
2459: manager->magic = SOCKET_MANAGER_MAGIC;
2460: manager->mctx = NULL;
2461: manager->stats = NULL;
2462: ISC_LIST_INIT(manager->socklist);
2463: result = isc_mutex_init(&manager->lock);
2464: if (result != ISC_R_SUCCESS) {
2465: isc_mem_put(mctx, manager, sizeof(*manager));
2466: return (result);
2467: }
2468: if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2469: DESTROYLOCK(&manager->lock);
2470: isc_mem_put(mctx, manager, sizeof(*manager));
2471: UNEXPECTED_ERROR(__FILE__, __LINE__,
2472: "isc_condition_init() %s",
2473: isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2474: ISC_MSG_FAILED, "failed"));
2475: return (ISC_R_UNEXPECTED);
2476: }
2477:
2478: isc_mem_attach(mctx, &manager->mctx);
2479:
2480: iocompletionport_init(manager); /* Create the Completion Ports */
2481:
2482: manager->bShutdown = ISC_FALSE;
2483: manager->totalSockets = 0;
2484: manager->iocp_total = 0;
2485:
2486: *managerp = manager;
2487:
2488: return (ISC_R_SUCCESS);
2489: }
2490:
2491: isc_result_t
2492: isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) {
2493: REQUIRE(VALID_MANAGER(manager));
2494: REQUIRE(nsockp != NULL);
2495:
2496: return (ISC_R_NOTIMPLEMENTED);
2497: }
2498:
2499: void
2500: isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) {
2501: REQUIRE(VALID_MANAGER(manager));
2502: REQUIRE(ISC_LIST_EMPTY(manager->socklist));
2503: REQUIRE(manager->stats == NULL);
2504: REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2505:
2506: isc_stats_attach(stats, &manager->stats);
2507: }
2508:
2509: void
2510: isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
2511: isc_socketmgr_t *manager;
2512: int i;
2513: isc_mem_t *mctx;
2514:
2515: /*
2516: * Destroy a socket manager.
2517: */
2518:
2519: REQUIRE(managerp != NULL);
2520: manager = *managerp;
2521: REQUIRE(VALID_MANAGER(manager));
2522:
2523: LOCK(&manager->lock);
2524:
2525: /*
2526: * Wait for all sockets to be destroyed.
2527: */
2528: while (!ISC_LIST_EMPTY(manager->socklist)) {
2529: manager_log(manager, CREATION,
2530: isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2531: ISC_MSG_SOCKETSREMAIN,
2532: "sockets exist"));
2533: WAIT(&manager->shutdown_ok, &manager->lock);
2534: }
2535:
2536: UNLOCK(&manager->lock);
2537:
2538: /*
2539: * Here, we need to had some wait code for the completion port
2540: * thread.
2541: */
2542: signal_iocompletionport_exit(manager);
2543: manager->bShutdown = ISC_TRUE;
2544:
2545: /*
2546: * Wait for threads to exit.
2547: */
2548: for (i = 0; i < manager->maxIOCPThreads; i++) {
2549: if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
2550: NULL) != ISC_R_SUCCESS)
2551: UNEXPECTED_ERROR(__FILE__, __LINE__,
2552: "isc_thread_join() for Completion Port %s",
2553: isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2554: ISC_MSG_FAILED, "failed"));
2555: }
2556: /*
2557: * Clean up.
2558: */
2559:
2560: CloseHandle(manager->hIoCompletionPort);
2561:
2562: (void)isc_condition_destroy(&manager->shutdown_ok);
2563:
2564: DESTROYLOCK(&manager->lock);
2565: if (manager->stats != NULL)
2566: isc_stats_detach(&manager->stats);
2567: manager->magic = 0;
2568: mctx= manager->mctx;
2569: isc_mem_put(mctx, manager, sizeof(*manager));
2570:
2571: isc_mem_detach(&mctx);
2572:
2573: *managerp = NULL;
2574: }
2575:
2576: static void
2577: queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
2578: {
2579: isc_task_t *ntask = NULL;
2580:
2581: isc_task_attach(task, &ntask);
2582: dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2583:
2584: /*
2585: * Enqueue the request.
2586: */
2587: INSIST(!ISC_LINK_LINKED(dev, ev_link));
2588: ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2589:
2590: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2591: "queue_receive_event: event %p -> task %p",
2592: dev, ntask);
2593: }
2594:
2595: /*
2596: * Check the pending receive queue, and if we have data pending, give it to this
2597: * caller. If we have none, queue an I/O request. If this caller is not the first
2598: * on the list, then we will just queue this event and return.
2599: *
2600: * Caller must have the socket locked.
2601: */
2602: static isc_result_t
2603: socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2604: unsigned int flags)
2605: {
2606: int cc = 0;
2607: isc_task_t *ntask = NULL;
2608: isc_result_t result = ISC_R_SUCCESS;
2609: int recv_errno = 0;
2610:
2611: dev->ev_sender = task;
2612:
2613: if (sock->fd == INVALID_SOCKET)
2614: return (ISC_R_EOF);
2615:
2616: /*
2617: * Queue our event on the list of things to do. Call our function to
2618: * attempt to fill buffers as much as possible, and return done events.
2619: * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2620: * here and tell our caller that we could not satisfy it immediately.
2621: */
2622: queue_receive_event(sock, task, dev);
2623: if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2624: result = ISC_R_INPROGRESS;
2625:
2626: completeio_recv(sock);
2627:
2628: /*
2629: * If there are more receivers waiting for data, queue another receive
2630: * here. If the
2631: */
2632: queue_receive_request(sock);
2633:
2634: return (result);
2635: }
2636:
2637: isc_result_t
2638: isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2639: unsigned int minimum, isc_task_t *task,
2640: isc_taskaction_t action, const void *arg)
2641: {
2642: isc_socketevent_t *dev;
2643: isc_socketmgr_t *manager;
2644: unsigned int iocount;
2645: isc_buffer_t *buffer;
2646: isc_result_t ret;
2647:
2648: REQUIRE(VALID_SOCKET(sock));
2649: LOCK(&sock->lock);
2650: CONSISTENT(sock);
2651:
2652: /*
2653: * Make sure that the socket is not closed. XXXMLG change error here?
2654: */
2655: if (sock->fd == INVALID_SOCKET) {
2656: UNLOCK(&sock->lock);
2657: return (ISC_R_CONNREFUSED);
2658: }
2659:
2660: REQUIRE(buflist != NULL);
2661: REQUIRE(!ISC_LIST_EMPTY(*buflist));
2662: REQUIRE(task != NULL);
2663: REQUIRE(action != NULL);
2664:
2665: manager = sock->manager;
2666: REQUIRE(VALID_MANAGER(manager));
2667:
2668: iocount = isc_bufferlist_availablecount(buflist);
2669: REQUIRE(iocount > 0);
2670:
2671: INSIST(sock->bound);
2672:
2673: dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2674: if (dev == NULL) {
2675: UNLOCK(&sock->lock);
2676: return (ISC_R_NOMEMORY);
2677: }
2678:
2679: /*
2680: * UDP sockets are always partial read
2681: */
2682: if (sock->type == isc_sockettype_udp)
2683: dev->minimum = 1;
2684: else {
2685: if (minimum == 0)
2686: dev->minimum = iocount;
2687: else
2688: dev->minimum = minimum;
2689: }
2690:
2691: /*
2692: * Move each buffer from the passed in list to our internal one.
2693: */
2694: buffer = ISC_LIST_HEAD(*buflist);
2695: while (buffer != NULL) {
2696: ISC_LIST_DEQUEUE(*buflist, buffer, link);
2697: ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2698: buffer = ISC_LIST_HEAD(*buflist);
2699: }
2700:
2701: ret = socket_recv(sock, dev, task, 0);
2702:
2703: UNLOCK(&sock->lock);
2704: return (ret);
2705: }
2706:
2707: isc_result_t
2708: isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum,
2709: isc_task_t *task, isc_taskaction_t action, const void *arg)
2710: {
2711: isc_socketevent_t *dev;
2712: isc_socketmgr_t *manager;
2713: isc_result_t ret;
2714:
2715: REQUIRE(VALID_SOCKET(sock));
2716: LOCK(&sock->lock);
2717: CONSISTENT(sock);
2718:
2719: /*
2720: * make sure that the socket's not closed
2721: */
2722: if (sock->fd == INVALID_SOCKET) {
2723: UNLOCK(&sock->lock);
2724: return (ISC_R_CONNREFUSED);
2725: }
2726: REQUIRE(action != NULL);
2727:
2728: manager = sock->manager;
2729: REQUIRE(VALID_MANAGER(manager));
2730:
2731: INSIST(sock->bound);
2732:
2733: dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2734: if (dev == NULL) {
2735: UNLOCK(&sock->lock);
2736: return (ISC_R_NOMEMORY);
2737: }
2738:
2739: ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
2740: UNLOCK(&sock->lock);
2741: return (ret);
2742: }
2743:
2744: isc_result_t
2745: isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
2746: unsigned int minimum, isc_task_t *task,
2747: isc_socketevent_t *event, unsigned int flags)
2748: {
2749: isc_result_t ret;
2750:
2751: REQUIRE(VALID_SOCKET(sock));
2752: LOCK(&sock->lock);
2753: CONSISTENT(sock);
2754:
2755: event->result = ISC_R_UNEXPECTED;
2756: event->ev_sender = sock;
2757: /*
2758: * make sure that the socket's not closed
2759: */
2760: if (sock->fd == INVALID_SOCKET) {
2761: UNLOCK(&sock->lock);
2762: return (ISC_R_CONNREFUSED);
2763: }
2764:
2765: ISC_LIST_INIT(event->bufferlist);
2766: event->region = *region;
2767: event->n = 0;
2768: event->offset = 0;
2769: event->attributes = 0;
2770:
2771: /*
2772: * UDP sockets are always partial read.
2773: */
2774: if (sock->type == isc_sockettype_udp)
2775: event->minimum = 1;
2776: else {
2777: if (minimum == 0)
2778: event->minimum = region->length;
2779: else
2780: event->minimum = minimum;
2781: }
2782:
2783: ret = socket_recv(sock, event, task, flags);
2784: UNLOCK(&sock->lock);
2785: return (ret);
2786: }
2787:
2788: /*
2789: * Caller must have the socket locked.
2790: */
2791: static isc_result_t
2792: socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2793: isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2794: unsigned int flags)
2795: {
2796: int io_state;
2797: int send_errno = 0;
2798: int cc = 0;
2799: isc_task_t *ntask = NULL;
2800: isc_result_t result = ISC_R_SUCCESS;
2801:
2802: dev->ev_sender = task;
2803:
2804: set_dev_address(address, sock, dev);
2805: if (pktinfo != NULL) {
2806: socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET,
2807: ISC_MSG_PKTINFOPROVIDED,
2808: "pktinfo structure provided, ifindex %u (set to 0)",
2809: pktinfo->ipi6_ifindex);
2810:
2811: dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
2812: dev->pktinfo = *pktinfo;
2813: /*
2814: * Set the pktinfo index to 0 here, to let the kernel decide
2815: * what interface it should send on.
2816: */
2817: dev->pktinfo.ipi6_ifindex = 0;
2818: }
2819:
2820: io_state = startio_send(sock, dev, &cc, &send_errno);
2821: switch (io_state) {
2822: case DOIO_PENDING: /* I/O started. Nothing more to do */
2823: case DOIO_SOFT:
2824: /*
2825: * We couldn't send all or part of the request right now, so
2826: * queue it unless ISC_SOCKFLAG_NORETRY is set.
2827: */
2828: if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
2829: isc_task_attach(task, &ntask);
2830: dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2831:
2832: /*
2833: * Enqueue the request.
2834: */
2835: INSIST(!ISC_LINK_LINKED(dev, ev_link));
2836: ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
2837:
2838: socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2839: "socket_send: event %p -> task %p",
2840: dev, ntask);
2841:
2842: if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2843: result = ISC_R_INPROGRESS;
2844: break;
2845: }
2846:
2847: case DOIO_SUCCESS:
2848: break;
2849: }
2850:
2851: return (result);
2852: }
2853:
2854: isc_result_t
2855: isc_socket_send(isc_socket_t *sock, isc_region_t *region,
2856: isc_task_t *task, isc_taskaction_t action, const void *arg)
2857: {
2858: /*
2859: * REQUIRE() checking is performed in isc_socket_sendto().
2860: */
2861: return (isc_socket_sendto(sock, region, task, action, arg, NULL,
2862: NULL));
2863: }
2864:
2865: isc_result_t
2866: isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
2867: isc_task_t *task, isc_taskaction_t action, const void *arg,
2868: isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2869: {
2870: isc_socketevent_t *dev;
2871: isc_socketmgr_t *manager;
2872: isc_result_t ret;
2873:
2874: REQUIRE(VALID_SOCKET(sock));
2875: REQUIRE(sock->type != isc_sockettype_fdwatch);
2876:
2877: LOCK(&sock->lock);
2878: CONSISTENT(sock);
2879:
2880: /*
2881: * make sure that the socket's not closed
2882: */
2883: if (sock->fd == INVALID_SOCKET) {
2884: UNLOCK(&sock->lock);
2885: return (ISC_R_CONNREFUSED);
2886: }
2887: REQUIRE(region != NULL);
2888: REQUIRE(task != NULL);
2889: REQUIRE(action != NULL);
2890:
2891: manager = sock->manager;
2892: REQUIRE(VALID_MANAGER(manager));
2893:
2894: INSIST(sock->bound);
2895:
2896: dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2897: if (dev == NULL) {
2898: UNLOCK(&sock->lock);
2899: return (ISC_R_NOMEMORY);
2900: }
2901: dev->region = *region;
2902:
2903: ret = socket_send(sock, dev, task, address, pktinfo, 0);
2904: UNLOCK(&sock->lock);
2905: return (ret);
2906: }
2907:
2908: isc_result_t
2909: isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2910: isc_task_t *task, isc_taskaction_t action, const void *arg)
2911: {
2912: return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL,
2913: NULL));
2914: }
2915:
2916: isc_result_t
2917: isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
2918: isc_task_t *task, isc_taskaction_t action, const void *arg,
2919: isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2920: {
2921: isc_socketevent_t *dev;
2922: isc_socketmgr_t *manager;
2923: unsigned int iocount;
2924: isc_buffer_t *buffer;
2925: isc_result_t ret;
2926:
2927: REQUIRE(VALID_SOCKET(sock));
2928:
2929: LOCK(&sock->lock);
2930: CONSISTENT(sock);
2931:
2932: /*
2933: * make sure that the socket's not closed
2934: */
2935: if (sock->fd == INVALID_SOCKET) {
2936: UNLOCK(&sock->lock);
2937: return (ISC_R_CONNREFUSED);
2938: }
2939: REQUIRE(buflist != NULL);
2940: REQUIRE(!ISC_LIST_EMPTY(*buflist));
2941: REQUIRE(task != NULL);
2942: REQUIRE(action != NULL);
2943:
2944: manager = sock->manager;
2945: REQUIRE(VALID_MANAGER(manager));
2946:
2947: iocount = isc_bufferlist_usedcount(buflist);
2948: REQUIRE(iocount > 0);
2949:
2950: dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2951: if (dev == NULL) {
2952: UNLOCK(&sock->lock);
2953: return (ISC_R_NOMEMORY);
2954: }
2955:
2956: /*
2957: * Move each buffer from the passed in list to our internal one.
2958: */
2959: buffer = ISC_LIST_HEAD(*buflist);
2960: while (buffer != NULL) {
2961: ISC_LIST_DEQUEUE(*buflist, buffer, link);
2962: ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2963: buffer = ISC_LIST_HEAD(*buflist);
2964: }
2965:
2966: ret = socket_send(sock, dev, task, address, pktinfo, 0);
2967: UNLOCK(&sock->lock);
2968: return (ret);
2969: }
2970:
2971: isc_result_t
2972: isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region,
2973: isc_task_t *task,
2974: isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2975: isc_socketevent_t *event, unsigned int flags)
2976: {
2977: isc_result_t ret;
2978:
2979: REQUIRE(VALID_SOCKET(sock));
2980: LOCK(&sock->lock);
2981: CONSISTENT(sock);
2982:
2983: REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
2984: if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
2985: REQUIRE(sock->type == isc_sockettype_udp);
2986: event->ev_sender = sock;
2987: event->result = ISC_R_UNEXPECTED;
2988: /*
2989: * make sure that the socket's not closed
2990: */
2991: if (sock->fd == INVALID_SOCKET) {
2992: UNLOCK(&sock->lock);
2993: return (ISC_R_CONNREFUSED);
2994: }
2995: ISC_LIST_INIT(event->bufferlist);
2996: event->region = *region;
2997: event->n = 0;
2998: event->offset = 0;
2999: event->attributes = 0;
3000:
3001: ret = socket_send(sock, event, task, address, pktinfo, flags);
3002: UNLOCK(&sock->lock);
3003: return (ret);
3004: }
3005:
3006: isc_result_t
3007: isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
3008: unsigned int options) {
3009: int bind_errno;
3010: char strbuf[ISC_STRERRORSIZE];
3011: int on = 1;
3012:
3013: REQUIRE(VALID_SOCKET(sock));
3014: LOCK(&sock->lock);
3015: CONSISTENT(sock);
3016:
3017: /*
3018: * make sure that the socket's not closed
3019: */
3020: if (sock->fd == INVALID_SOCKET) {
3021: UNLOCK(&sock->lock);
3022: return (ISC_R_CONNREFUSED);
3023: }
3024:
3025: INSIST(!sock->bound);
3026:
3027: if (sock->pf != sockaddr->type.sa.sa_family) {
3028: UNLOCK(&sock->lock);
3029: return (ISC_R_FAMILYMISMATCH);
3030: }
3031: /*
3032: * Only set SO_REUSEADDR when we want a specific port.
3033: */
3034: if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
3035: isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
3036: setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on,
3037: sizeof(on)) < 0) {
3038: UNEXPECTED_ERROR(__FILE__, __LINE__,
3039: "setsockopt(%d) %s", sock->fd,
3040: isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
3041: ISC_MSG_FAILED, "failed"));
3042: /* Press on... */
3043: }
3044: if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
3045: bind_errno = WSAGetLastError();
3046: UNLOCK(&sock->lock);
3047: switch (bind_errno) {
3048: case WSAEACCES:
3049: return (ISC_R_NOPERM);
3050: case WSAEADDRNOTAVAIL:
3051: return (ISC_R_ADDRNOTAVAIL);
3052: case WSAEADDRINUSE:
3053: return (ISC_R_ADDRINUSE);
3054: case WSAEINVAL:
3055: return (ISC_R_BOUND);
3056: default:
3057: isc__strerror(bind_errno, strbuf, sizeof(strbuf));
3058: UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
3059: strbuf);
3060: return (ISC_R_UNEXPECTED);
3061: }
3062: }
3063:
3064: socket_log(__LINE__, sock, sockaddr, TRACE,
3065: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
3066: sock->bound = 1;
3067:
3068: UNLOCK(&sock->lock);
3069: return (ISC_R_SUCCESS);
3070: }
3071:
3072: isc_result_t
3073: isc_socket_filter(isc_socket_t *sock, const char *filter) {
3074: UNUSED(sock);
3075: UNUSED(filter);
3076:
3077: REQUIRE(VALID_SOCKET(sock));
3078: return (ISC_R_NOTIMPLEMENTED);
3079: }
3080:
3081: /*
3082: * Set up to listen on a given socket. We do this by creating an internal
3083: * event that will be dispatched when the socket has read activity. The
3084: * watcher will send the internal event to the task when there is a new
3085: * connection.
3086: *
3087: * Unlike in read, we don't preallocate a done event here. Every time there
3088: * is a new connection we'll have to allocate a new one anyway, so we might
3089: * as well keep things simple rather than having to track them.
3090: */
3091: isc_result_t
3092: isc_socket_listen(isc_socket_t *sock, unsigned int backlog) {
3093: char strbuf[ISC_STRERRORSIZE];
3094:
3095: REQUIRE(VALID_SOCKET(sock));
3096:
3097: LOCK(&sock->lock);
3098: CONSISTENT(sock);
3099:
3100: /*
3101: * make sure that the socket's not closed
3102: */
3103: if (sock->fd == INVALID_SOCKET) {
3104: UNLOCK(&sock->lock);
3105: return (ISC_R_CONNREFUSED);
3106: }
3107:
3108: REQUIRE(!sock->listener);
3109: REQUIRE(sock->bound);
3110: REQUIRE(sock->type == isc_sockettype_tcp);
3111:
3112: if (backlog == 0)
3113: backlog = SOMAXCONN;
3114:
3115: if (listen(sock->fd, (int)backlog) < 0) {
3116: UNLOCK(&sock->lock);
3117: isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3118:
3119: UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
3120:
3121: return (ISC_R_UNEXPECTED);
3122: }
3123:
3124: socket_log(__LINE__, sock, NULL, TRACE,
3125: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
3126: sock->listener = 1;
3127: _set_state(sock, SOCK_LISTEN);
3128:
3129: UNLOCK(&sock->lock);
3130: return (ISC_R_SUCCESS);
3131: }
3132:
3133: /*
3134: * This should try to do aggressive accept() XXXMLG
3135: */
3136: isc_result_t
3137: isc_socket_accept(isc_socket_t *sock,
3138: isc_task_t *task, isc_taskaction_t action, const void *arg)
3139: {
3140: isc_socket_newconnev_t *adev;
3141: isc_socketmgr_t *manager;
3142: isc_task_t *ntask = NULL;
3143: isc_socket_t *nsock;
3144: isc_result_t result;
3145: IoCompletionInfo *lpo;
3146:
3147: REQUIRE(VALID_SOCKET(sock));
3148:
3149: manager = sock->manager;
3150: REQUIRE(VALID_MANAGER(manager));
3151:
3152: LOCK(&sock->lock);
3153: CONSISTENT(sock);
3154:
3155: /*
3156: * make sure that the socket's not closed
3157: */
3158: if (sock->fd == INVALID_SOCKET) {
3159: UNLOCK(&sock->lock);
3160: return (ISC_R_CONNREFUSED);
3161: }
3162:
3163: REQUIRE(sock->listener);
3164:
3165: /*
3166: * Sender field is overloaded here with the task we will be sending
3167: * this event to. Just before the actual event is delivered the
3168: * actual ev_sender will be touched up to be the socket.
3169: */
3170: adev = (isc_socket_newconnev_t *)
3171: isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3172: action, arg, sizeof(*adev));
3173: if (adev == NULL) {
3174: UNLOCK(&sock->lock);
3175: return (ISC_R_NOMEMORY);
3176: }
3177: ISC_LINK_INIT(adev, ev_link);
3178:
3179: result = allocate_socket(manager, sock->type, &nsock);
3180: if (result != ISC_R_SUCCESS) {
3181: isc_event_free((isc_event_t **)&adev);
3182: UNLOCK(&sock->lock);
3183: return (result);
3184: }
3185:
3186: /*
3187: * AcceptEx() requires we pass in a socket.
3188: */
3189: nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
3190: if (nsock->fd == INVALID_SOCKET) {
3191: free_socket(&nsock, __LINE__);
3192: isc_event_free((isc_event_t **)&adev);
3193: UNLOCK(&sock->lock);
3194: return (ISC_R_FAILURE); // XXXMLG need real error message
3195: }
3196:
3197: /*
3198: * Attach to socket and to task.
3199: */
3200: isc_task_attach(task, &ntask);
3201: nsock->references++;
3202:
3203: adev->ev_sender = ntask;
3204: adev->newsocket = nsock;
3205: _set_state(nsock, SOCK_ACCEPT);
3206:
3207: /*
3208: * Queue io completion for an accept().
3209: */
3210: lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3211: HEAP_ZERO_MEMORY,
3212: sizeof(IoCompletionInfo));
3213: RUNTIME_CHECK(lpo != NULL);
3214: lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
3215: (sizeof(SOCKADDR_STORAGE) + 16) * 2);
3216: RUNTIME_CHECK(lpo->acceptbuffer != NULL);
3217:
3218: lpo->adev = adev;
3219: lpo->request_type = SOCKET_ACCEPT;
3220:
3221: ISCAcceptEx(sock->fd,
3222: nsock->fd, /* Accepted Socket */
3223: lpo->acceptbuffer, /* Buffer for initial Recv */
3224: 0, /* Length of Buffer */
3225: sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */
3226: sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */
3227: (LPDWORD)&lpo->received_bytes, /* Bytes Recved */
3228: (LPOVERLAPPED)lpo /* Overlapped structure */
3229: );
3230: iocompletionport_update(nsock);
3231:
3232: socket_log(__LINE__, sock, NULL, TRACE,
3233: isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
3234: "accepting for nsock %p fd %d", nsock, nsock->fd);
3235:
3236: /*
3237: * Enqueue the event
3238: */
3239: ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
3240: sock->pending_accept++;
3241: sock->pending_iocp++;
3242:
3243: UNLOCK(&sock->lock);
3244: return (ISC_R_SUCCESS);
3245: }
3246:
3247: isc_result_t
3248: isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3249: isc_task_t *task, isc_taskaction_t action, const void *arg)
3250: {
3251: char strbuf[ISC_STRERRORSIZE];
3252: isc_socket_connev_t *cdev;
3253: isc_task_t *ntask = NULL;
3254: isc_socketmgr_t *manager;
3255: IoCompletionInfo *lpo;
3256: int bind_errno;
3257:
3258: REQUIRE(VALID_SOCKET(sock));
3259: REQUIRE(addr != NULL);
3260: REQUIRE(task != NULL);
3261: REQUIRE(action != NULL);
3262:
3263: manager = sock->manager;
3264: REQUIRE(VALID_MANAGER(manager));
3265: REQUIRE(addr != NULL);
3266:
3267: if (isc_sockaddr_ismulticast(addr))
3268: return (ISC_R_MULTICAST);
3269:
3270: LOCK(&sock->lock);
3271: CONSISTENT(sock);
3272:
3273: /*
3274: * make sure that the socket's not closed
3275: */
3276: if (sock->fd == INVALID_SOCKET) {
3277: UNLOCK(&sock->lock);
3278: return (ISC_R_CONNREFUSED);
3279: }
3280:
3281: /*
3282: * Windows sockets won't connect unless the socket is bound.
3283: */
3284: if (!sock->bound) {
3285: isc_sockaddr_t any;
3286:
3287: isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr));
3288: if (bind(sock->fd, &any.type.sa, any.length) < 0) {
3289: bind_errno = WSAGetLastError();
3290: UNLOCK(&sock->lock);
3291: switch (bind_errno) {
3292: case WSAEACCES:
3293: return (ISC_R_NOPERM);
3294: case WSAEADDRNOTAVAIL:
3295: return (ISC_R_ADDRNOTAVAIL);
3296: case WSAEADDRINUSE:
3297: return (ISC_R_ADDRINUSE);
3298: case WSAEINVAL:
3299: return (ISC_R_BOUND);
3300: default:
3301: isc__strerror(bind_errno, strbuf,
3302: sizeof(strbuf));
3303: UNEXPECTED_ERROR(__FILE__, __LINE__,
3304: "bind: %s", strbuf);
3305: return (ISC_R_UNEXPECTED);
3306: }
3307: }
3308: sock->bound = 1;
3309: }
3310:
3311: REQUIRE(!sock->pending_connect);
3312:
3313: cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3314: ISC_SOCKEVENT_CONNECT,
3315: action, arg,
3316: sizeof(*cdev));
3317: if (cdev == NULL) {
3318: UNLOCK(&sock->lock);
3319: return (ISC_R_NOMEMORY);
3320: }
3321: ISC_LINK_INIT(cdev, ev_link);
3322:
3323: if (sock->type == isc_sockettype_tcp) {
3324: /*
3325: * Queue io completion for an accept().
3326: */
3327: lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3328: HEAP_ZERO_MEMORY,
3329: sizeof(IoCompletionInfo));
3330: lpo->cdev = cdev;
3331: lpo->request_type = SOCKET_CONNECT;
3332:
3333: sock->address = *addr;
3334: ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
3335: NULL, 0, NULL, (LPOVERLAPPED)lpo);
3336:
3337: /*
3338: * Attach to task.
3339: */
3340: isc_task_attach(task, &ntask);
3341: cdev->ev_sender = ntask;
3342:
3343: sock->pending_connect = 1;
3344: _set_state(sock, SOCK_CONNECT);
3345:
3346: /*
3347: * Enqueue the request.
3348: */
3349: sock->connect_ev = cdev;
3350: sock->pending_iocp++;
3351: } else {
3352: WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL);
3353: cdev->result = ISC_R_SUCCESS;
3354: isc_task_send(task, (isc_event_t **)&cdev);
3355: }
3356: CONSISTENT(sock);
3357: UNLOCK(&sock->lock);
3358:
3359: return (ISC_R_SUCCESS);
3360: }
3361:
3362: isc_result_t
3363: isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3364: isc_result_t result;
3365:
3366: REQUIRE(VALID_SOCKET(sock));
3367: REQUIRE(addressp != NULL);
3368:
3369: LOCK(&sock->lock);
3370: CONSISTENT(sock);
3371:
3372: /*
3373: * make sure that the socket's not closed
3374: */
3375: if (sock->fd == INVALID_SOCKET) {
3376: UNLOCK(&sock->lock);
3377: return (ISC_R_CONNREFUSED);
3378: }
3379:
3380: if (sock->connected) {
3381: *addressp = sock->address;
3382: result = ISC_R_SUCCESS;
3383: } else {
3384: result = ISC_R_NOTCONNECTED;
3385: }
3386:
3387: UNLOCK(&sock->lock);
3388:
3389: return (result);
3390: }
3391:
3392: isc_result_t
3393: isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3394: ISC_SOCKADDR_LEN_T len;
3395: isc_result_t result;
3396: char strbuf[ISC_STRERRORSIZE];
3397:
3398: REQUIRE(VALID_SOCKET(sock));
3399: REQUIRE(addressp != NULL);
3400:
3401: LOCK(&sock->lock);
3402: CONSISTENT(sock);
3403:
3404: /*
3405: * make sure that the socket's not closed
3406: */
3407: if (sock->fd == INVALID_SOCKET) {
3408: UNLOCK(&sock->lock);
3409: return (ISC_R_CONNREFUSED);
3410: }
3411:
3412: if (!sock->bound) {
3413: result = ISC_R_NOTBOUND;
3414: goto out;
3415: }
3416:
3417: result = ISC_R_SUCCESS;
3418:
3419: len = sizeof(addressp->type);
3420: if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3421: isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3422: UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3423: strbuf);
3424: result = ISC_R_UNEXPECTED;
3425: goto out;
3426: }
3427: addressp->length = (unsigned int)len;
3428:
3429: out:
3430: UNLOCK(&sock->lock);
3431:
3432: return (result);
3433: }
3434:
3435: /*
3436: * Run through the list of events on this socket, and cancel the ones
3437: * queued for task "task" of type "how". "how" is a bitmask.
3438: */
3439: void
3440: isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3441:
3442: REQUIRE(VALID_SOCKET(sock));
3443:
3444: /*
3445: * Quick exit if there is nothing to do. Don't even bother locking
3446: * in this case.
3447: */
3448: if (how == 0)
3449: return;
3450:
3451: LOCK(&sock->lock);
3452: CONSISTENT(sock);
3453:
3454: /*
3455: * make sure that the socket's not closed
3456: */
3457: if (sock->fd == INVALID_SOCKET) {
3458: UNLOCK(&sock->lock);
3459: return;
3460: }
3461:
3462: /*
3463: * All of these do the same thing, more or less.
3464: * Each will:
3465: * o If the internal event is marked as "posted" try to
3466: * remove it from the task's queue. If this fails, mark it
3467: * as canceled instead, and let the task clean it up later.
3468: * o For each I/O request for that task of that type, post
3469: * its done event with status of "ISC_R_CANCELED".
3470: * o Reset any state needed.
3471: */
3472:
3473: if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) {
3474: isc_socketevent_t *dev;
3475: isc_socketevent_t *next;
3476: isc_task_t *current_task;
3477:
3478: dev = ISC_LIST_HEAD(sock->recv_list);
3479: while (dev != NULL) {
3480: current_task = dev->ev_sender;
3481: next = ISC_LIST_NEXT(dev, ev_link);
3482: if ((task == NULL) || (task == current_task)) {
3483: dev->result = ISC_R_CANCELED;
3484: send_recvdone_event(sock, &dev);
3485: }
3486: dev = next;
3487: }
3488: }
3489: how &= ~ISC_SOCKCANCEL_RECV;
3490:
3491: if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) {
3492: isc_socketevent_t *dev;
3493: isc_socketevent_t *next;
3494: isc_task_t *current_task;
3495:
3496: dev = ISC_LIST_HEAD(sock->send_list);
3497:
3498: while (dev != NULL) {
3499: current_task = dev->ev_sender;
3500: next = ISC_LIST_NEXT(dev, ev_link);
3501: if ((task == NULL) || (task == current_task)) {
3502: dev->result = ISC_R_CANCELED;
3503: send_senddone_event(sock, &dev);
3504: }
3505: dev = next;
3506: }
3507: }
3508: how &= ~ISC_SOCKCANCEL_SEND;
3509:
3510: if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
3511: && !ISC_LIST_EMPTY(sock->accept_list)) {
3512: isc_socket_newconnev_t *dev;
3513: isc_socket_newconnev_t *next;
3514: isc_task_t *current_task;
3515:
3516: dev = ISC_LIST_HEAD(sock->accept_list);
3517: while (dev != NULL) {
3518: current_task = dev->ev_sender;
3519: next = ISC_LIST_NEXT(dev, ev_link);
3520:
3521: if ((task == NULL) || (task == current_task)) {
3522:
3523: dev->newsocket->references--;
3524: closesocket(dev->newsocket->fd);
3525: dev->newsocket->fd = INVALID_SOCKET;
3526: free_socket(&dev->newsocket, __LINE__);
3527:
3528: dev->result = ISC_R_CANCELED;
3529: send_acceptdone_event(sock, &dev);
3530: }
3531:
3532: dev = next;
3533: }
3534: }
3535: how &= ~ISC_SOCKCANCEL_ACCEPT;
3536:
3537: /*
3538: * Connecting is not a list.
3539: */
3540: if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
3541: && sock->connect_ev != NULL) {
3542: isc_socket_connev_t *dev;
3543: isc_task_t *current_task;
3544:
3545: INSIST(sock->pending_connect);
3546:
3547: dev = sock->connect_ev;
3548: current_task = dev->ev_sender;
3549:
3550: if ((task == NULL) || (task == current_task)) {
3551: closesocket(sock->fd);
3552: sock->fd = INVALID_SOCKET;
3553: _set_state(sock, SOCK_CLOSED);
3554:
3555: sock->connect_ev = NULL;
3556: dev->result = ISC_R_CANCELED;
3557: send_connectdone_event(sock, &dev);
3558: }
3559: }
3560: how &= ~ISC_SOCKCANCEL_CONNECT;
3561:
3562: maybe_free_socket(&sock, __LINE__);
3563: }
3564:
3565: isc_sockettype_t
3566: isc_socket_gettype(isc_socket_t *sock) {
3567: isc_sockettype_t type;
3568:
3569: REQUIRE(VALID_SOCKET(sock));
3570:
3571: LOCK(&sock->lock);
3572:
3573: /*
3574: * make sure that the socket's not closed
3575: */
3576: if (sock->fd == INVALID_SOCKET) {
3577: UNLOCK(&sock->lock);
3578: return (ISC_R_CONNREFUSED);
3579: }
3580:
3581: type = sock->type;
3582: UNLOCK(&sock->lock);
3583: return (type);
3584: }
3585:
3586: isc_boolean_t
3587: isc_socket_isbound(isc_socket_t *sock) {
3588: isc_boolean_t val;
3589:
3590: REQUIRE(VALID_SOCKET(sock));
3591:
3592: LOCK(&sock->lock);
3593: CONSISTENT(sock);
3594:
3595: /*
3596: * make sure that the socket's not closed
3597: */
3598: if (sock->fd == INVALID_SOCKET) {
3599: UNLOCK(&sock->lock);
3600: return (ISC_FALSE);
3601: }
3602:
3603: val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
3604: UNLOCK(&sock->lock);
3605:
3606: return (val);
3607: }
3608:
3609: void
3610: isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
3611: #if defined(IPV6_V6ONLY)
3612: int onoff = yes ? 1 : 0;
3613: #else
3614: UNUSED(yes);
3615: #endif
3616:
3617: REQUIRE(VALID_SOCKET(sock));
3618:
3619: #ifdef IPV6_V6ONLY
3620: if (sock->pf == AF_INET6) {
3621: (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3622: (void *)&onoff, sizeof(onoff));
3623: }
3624: #endif
3625: }
3626:
3627: void
3628: isc_socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) {
3629: UNUSED(addr);
3630: UNUSED(active);
3631: }
3632:
3633: isc_result_t
3634: isc_socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm,
3635: isc_uint32_t owner, isc_uint32_t group)
3636: {
3637: UNUSED(addr);
3638: UNUSED(perm);
3639: UNUSED(owner);
3640: UNUSED(group);
3641: return (ISC_R_NOTIMPLEMENTED);
3642: }
3643:
3644: void
3645: isc_socket_setname(isc_socket_t *socket, const char *name, void *tag) {
3646:
3647: /*
3648: * Name 'socket'.
3649: */
3650:
3651: REQUIRE(VALID_SOCKET(socket));
3652:
3653: LOCK(&socket->lock);
3654: memset(socket->name, 0, sizeof(socket->name));
3655: strncpy(socket->name, name, sizeof(socket->name) - 1);
3656: socket->tag = tag;
3657: UNLOCK(&socket->lock);
3658: }
3659:
3660: const char *
3661: isc_socket_getname(isc_socket_t *socket) {
3662: return (socket->name);
3663: }
3664:
3665: void *
3666: isc_socket_gettag(isc_socket_t *socket) {
3667: return (socket->tag);
3668: }
3669:
3670: void
3671: isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
3672: UNUSED(manager);
3673: UNUSED(reserved);
3674: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>