/src/backend/port/win32/socket.c
C | 677 lines | 467 code | 84 blank | 126 comment | 129 complexity | 6fb12f3bdefbaf5d80a7d879e0387acc MD5 | raw file
Possible License(s): AGPL-3.0
- /*-------------------------------------------------------------------------
- *
- * socket.c
- * Microsoft Windows Win32 Socket Functions
- *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
- *
- * IDENTIFICATION
- * src/backend/port/win32/socket.c
- *
- *-------------------------------------------------------------------------
- */
- #include "postgres.h"
- /*
- * Indicate if pgwin32_recv() and pgwin32_send() should operate
- * in non-blocking mode.
- *
- * Since the socket emulation layer always sets the actual socket to
- * non-blocking mode in order to be able to deliver signals, we must
- * specify this in a separate flag if we actually need non-blocking
- * operation.
- *
- * This flag changes the behaviour *globally* for all socket operations,
- * so it should only be set for very short periods of time.
- */
- int pgwin32_noblock = 0;
- #undef socket
- #undef accept
- #undef connect
- #undef select
- #undef recv
- #undef send
- /*
- * Blocking socket functions implemented so they listen on both
- * the socket and the signal event, required for signal handling.
- */
- /*
- * Convert the last socket error code into errno
- */
- static void
- TranslateSocketError(void)
- {
- switch (WSAGetLastError())
- {
- case WSANOTINITIALISED:
- case WSAENETDOWN:
- case WSAEINPROGRESS:
- case WSAEINVAL:
- case WSAESOCKTNOSUPPORT:
- case WSAEFAULT:
- case WSAEINVALIDPROVIDER:
- case WSAEINVALIDPROCTABLE:
- case WSAEMSGSIZE:
- errno = EINVAL;
- break;
- case WSAEAFNOSUPPORT:
- errno = EAFNOSUPPORT;
- break;
- case WSAEMFILE:
- errno = EMFILE;
- break;
- case WSAENOBUFS:
- errno = ENOBUFS;
- break;
- case WSAEPROTONOSUPPORT:
- case WSAEPROTOTYPE:
- errno = EPROTONOSUPPORT;
- break;
- case WSAECONNREFUSED:
- errno = ECONNREFUSED;
- break;
- case WSAEINTR:
- errno = EINTR;
- break;
- case WSAENOTSOCK:
- errno = EBADFD;
- break;
- case WSAEOPNOTSUPP:
- errno = EOPNOTSUPP;
- break;
- case WSAEWOULDBLOCK:
- errno = EWOULDBLOCK;
- break;
- case WSAEACCES:
- errno = EACCES;
- break;
- case WSAENOTCONN:
- case WSAENETRESET:
- case WSAECONNRESET:
- case WSAESHUTDOWN:
- case WSAECONNABORTED:
- case WSAEDISCON:
- errno = ECONNREFUSED; /* ENOTCONN? */
- break;
- default:
- ereport(NOTICE,
- (errmsg_internal("unrecognized win32 socket error code: %d", WSAGetLastError())));
- errno = EINVAL;
- }
- }
- static int
- pgwin32_poll_signals(void)
- {
- if (UNBLOCKED_SIGNAL_QUEUE())
- {
- pgwin32_dispatch_queued_signals();
- errno = EINTR;
- return 1;
- }
- return 0;
- }
- static int
- isDataGram(SOCKET s)
- {
- int type;
- int typelen = sizeof(type);
- if (getsockopt(s, SOL_SOCKET, SO_TYPE, (char *) &type, &typelen))
- return 1;
- return (type == SOCK_DGRAM) ? 1 : 0;
- }
- int
- pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout)
- {
- static HANDLE waitevent = INVALID_HANDLE_VALUE;
- static SOCKET current_socket = INVALID_SOCKET;
- static int isUDP = 0;
- HANDLE events[2];
- int r;
- /* Create an event object just once and use it on all future calls */
- if (waitevent == INVALID_HANDLE_VALUE)
- {
- waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);
- if (waitevent == INVALID_HANDLE_VALUE)
- ereport(ERROR,
- (errmsg_internal("could not create socket waiting event: error code %lu", GetLastError())));
- }
- else if (!ResetEvent(waitevent))
- ereport(ERROR,
- (errmsg_internal("could not reset socket waiting event: error code %lu", GetLastError())));
- /*
- * Track whether socket is UDP or not. (NB: most likely, this is both
- * useless and wrong; there is no reason to think that the behavior of
- * WSAEventSelect is different for TCP and UDP.)
- */
- if (current_socket != s)
- isUDP = isDataGram(s);
- current_socket = s;
- /*
- * Attach event to socket. NOTE: we must detach it again before
- * returning, since other bits of code may try to attach other events to
- * the socket.
- */
- if (WSAEventSelect(s, waitevent, what) != 0)
- {
- TranslateSocketError();
- return 0;
- }
- events[0] = pgwin32_signal_event;
- events[1] = waitevent;
- /*
- * Just a workaround of unknown locking problem with writing in UDP socket
- * under high load: Client's pgsql backend sleeps infinitely in
- * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select().
- * So, we will wait with small timeout(0.1 sec) and if socket is still
- * blocked, try WSASend (see comments in pgwin32_select) and wait again.
- */
- if ((what & FD_WRITE) && isUDP)
- {
- for (;;)
- {
- r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);
- if (r == WAIT_TIMEOUT)
- {
- char c;
- WSABUF buf;
- DWORD sent;
- buf.buf = &c;
- buf.len = 0;
- r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL);
- if (r == 0) /* Completed - means things are fine! */
- {
- WSAEventSelect(s, NULL, 0);
- return 1;
- }
- else if (WSAGetLastError() != WSAEWOULDBLOCK)
- {
- TranslateSocketError();
- WSAEventSelect(s, NULL, 0);
- return 0;
- }
- }
- else
- break;
- }
- }
- else
- r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE);
- WSAEventSelect(s, NULL, 0);
- if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
- {
- pgwin32_dispatch_queued_signals();
- errno = EINTR;
- return 0;
- }
- if (r == WAIT_OBJECT_0 + 1)
- return 1;
- if (r == WAIT_TIMEOUT)
- {
- errno = EWOULDBLOCK;
- return 0;
- }
- ereport(ERROR,
- (errmsg_internal("unrecognized return value from WaitForMultipleObjects: %d (error code %lu)", r, GetLastError())));
- return 0;
- }
- /*
- * Create a socket, setting it to overlapped and non-blocking
- */
- SOCKET
- pgwin32_socket(int af, int type, int protocol)
- {
- SOCKET s;
- unsigned long on = 1;
- s = WSASocket(af, type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED);
- if (s == INVALID_SOCKET)
- {
- TranslateSocketError();
- return INVALID_SOCKET;
- }
- if (ioctlsocket(s, FIONBIO, &on))
- {
- TranslateSocketError();
- return INVALID_SOCKET;
- }
- errno = 0;
- return s;
- }
- SOCKET
- pgwin32_accept(SOCKET s, struct sockaddr * addr, int *addrlen)
- {
- SOCKET rs;
- /*
- * Poll for signals, but don't return with EINTR, since we don't handle
- * that in pqcomm.c
- */
- pgwin32_poll_signals();
- rs = WSAAccept(s, addr, addrlen, NULL, 0);
- if (rs == INVALID_SOCKET)
- {
- TranslateSocketError();
- return INVALID_SOCKET;
- }
- return rs;
- }
- /* No signal delivery during connect. */
- int
- pgwin32_connect(SOCKET s, const struct sockaddr * addr, int addrlen)
- {
- int r;
- r = WSAConnect(s, addr, addrlen, NULL, NULL, NULL, NULL);
- if (r == 0)
- return 0;
- if (WSAGetLastError() != WSAEWOULDBLOCK)
- {
- TranslateSocketError();
- return -1;
- }
- while (pgwin32_waitforsinglesocket(s, FD_CONNECT, INFINITE) == 0)
- {
- /* Loop endlessly as long as we are just delivering signals */
- }
- return 0;
- }
- int
- pgwin32_recv(SOCKET s, char *buf, int len, int f)
- {
- WSABUF wbuf;
- int r;
- DWORD b;
- DWORD flags = f;
- int n;
- if (pgwin32_poll_signals())
- return -1;
- wbuf.len = len;
- wbuf.buf = buf;
- r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
- if (r != SOCKET_ERROR && b > 0)
- /* Read succeeded right away */
- return b;
- if (r == SOCKET_ERROR &&
- WSAGetLastError() != WSAEWOULDBLOCK)
- {
- TranslateSocketError();
- return -1;
- }
- if (pgwin32_noblock)
- {
- /*
- * No data received, and we are in "emulated non-blocking mode", so
- * return indicating that we'd block if we were to continue.
- */
- errno = EWOULDBLOCK;
- return -1;
- }
- /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
- for (n = 0; n < 5; n++)
- {
- if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT,
- INFINITE) == 0)
- return -1; /* errno already set */
- r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
- if (r == SOCKET_ERROR)
- {
- if (WSAGetLastError() == WSAEWOULDBLOCK)
- {
- /*
- * There seem to be cases on win2k (at least) where WSARecv
- * can return WSAEWOULDBLOCK even when
- * pgwin32_waitforsinglesocket claims the socket is readable.
- * In this case, just sleep for a moment and try again. We try
- * up to 5 times - if it fails more than that it's not likely
- * to ever come back.
- */
- pg_usleep(10000);
- continue;
- }
- TranslateSocketError();
- return -1;
- }
- return b;
- }
- ereport(NOTICE,
- (errmsg_internal("could not read from ready socket (after retries)")));
- errno = EWOULDBLOCK;
- return -1;
- }
- /*
- * The second argument to send() is defined by SUS to be a "const void *"
- * and so we use the same signature here to keep compilers happy when
- * handling callers.
- *
- * But the buf member of a WSABUF struct is defined as "char *", so we cast
- * the second argument to that here when assigning it, also to keep compilers
- * happy.
- */
- int
- pgwin32_send(SOCKET s, const void *buf, int len, int flags)
- {
- WSABUF wbuf;
- int r;
- DWORD b;
- if (pgwin32_poll_signals())
- return -1;
- wbuf.len = len;
- wbuf.buf = (char *) buf;
- /*
- * Readiness of socket to send data to UDP socket may be not true: socket
- * can become busy again! So loop until send or error occurs.
- */
- for (;;)
- {
- r = WSASend(s, &wbuf, 1, &b, flags, NULL, NULL);
- if (r != SOCKET_ERROR && b > 0)
- /* Write succeeded right away */
- return b;
- if (r == SOCKET_ERROR &&
- WSAGetLastError() != WSAEWOULDBLOCK)
- {
- TranslateSocketError();
- return -1;
- }
- if (pgwin32_noblock)
- {
- /*
- * No data sent, and we are in "emulated non-blocking mode", so
- * return indicating that we'd block if we were to continue.
- */
- errno = EWOULDBLOCK;
- return -1;
- }
- /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
- if (pgwin32_waitforsinglesocket(s, FD_WRITE | FD_CLOSE, INFINITE) == 0)
- return -1;
- }
- return -1;
- }
- /*
- * Wait for activity on one or more sockets.
- * While waiting, allow signals to run
- *
- * NOTE! Currently does not implement exceptfds check,
- * since it is not used in postgresql!
- */
- int
- pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout)
- {
- WSAEVENT events[FD_SETSIZE * 2]; /* worst case is readfds totally
- * different from writefds, so
- * 2*FD_SETSIZE sockets */
- SOCKET sockets[FD_SETSIZE * 2];
- int numevents = 0;
- int i;
- int r;
- DWORD timeoutval = WSA_INFINITE;
- FD_SET outreadfds;
- FD_SET outwritefds;
- int nummatches = 0;
- Assert(exceptfds == NULL);
- if (pgwin32_poll_signals())
- return -1;
- FD_ZERO(&outreadfds);
- FD_ZERO(&outwritefds);
- /*
- * Write FDs are different in the way that it is only flagged by
- * WSASelectEvent() if we have tried to write to them first. So try an
- * empty write
- */
- if (writefds)
- {
- for (i = 0; i < writefds->fd_count; i++)
- {
- char c;
- WSABUF buf;
- DWORD sent;
- buf.buf = &c;
- buf.len = 0;
- r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL);
- if (r == 0) /* Completed - means things are fine! */
- FD_SET(writefds->fd_array[i], &outwritefds);
- else
- { /* Not completed */
- if (WSAGetLastError() != WSAEWOULDBLOCK)
- /*
- * Not completed, and not just "would block", so an error
- * occurred
- */
- FD_SET(writefds->fd_array[i], &outwritefds);
- }
- }
- if (outwritefds.fd_count > 0)
- {
- memcpy(writefds, &outwritefds, sizeof(fd_set));
- if (readfds)
- FD_ZERO(readfds);
- return outwritefds.fd_count;
- }
- }
- /* Now set up for an actual select */
- if (timeout != NULL)
- {
- /* timeoutval is in milliseconds */
- timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
- }
- if (readfds != NULL)
- {
- for (i = 0; i < readfds->fd_count; i++)
- {
- events[numevents] = WSACreateEvent();
- sockets[numevents] = readfds->fd_array[i];
- numevents++;
- }
- }
- if (writefds != NULL)
- {
- for (i = 0; i < writefds->fd_count; i++)
- {
- if (!readfds ||
- !FD_ISSET(writefds->fd_array[i], readfds))
- {
- /* If the socket is not in the read list */
- events[numevents] = WSACreateEvent();
- sockets[numevents] = writefds->fd_array[i];
- numevents++;
- }
- }
- }
- for (i = 0; i < numevents; i++)
- {
- int flags = 0;
- if (readfds && FD_ISSET(sockets[i], readfds))
- flags |= FD_READ | FD_ACCEPT | FD_CLOSE;
- if (writefds && FD_ISSET(sockets[i], writefds))
- flags |= FD_WRITE | FD_CLOSE;
- if (WSAEventSelect(sockets[i], events[i], flags) != 0)
- {
- TranslateSocketError();
- /* release already-assigned event objects */
- while (--i >= 0)
- WSAEventSelect(sockets[i], NULL, 0);
- for (i = 0; i < numevents; i++)
- WSACloseEvent(events[i]);
- return -1;
- }
- }
- events[numevents] = pgwin32_signal_event;
- r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
- if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
- {
- /*
- * We scan all events, even those not signalled, in case more than one
- * event has been tagged but Wait.. can only return one.
- */
- WSANETWORKEVENTS resEvents;
- for (i = 0; i < numevents; i++)
- {
- ZeroMemory(&resEvents, sizeof(resEvents));
- if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) != 0)
- elog(ERROR, "failed to enumerate network events: error code %u",
- WSAGetLastError());
- /* Read activity? */
- if (readfds && FD_ISSET(sockets[i], readfds))
- {
- if ((resEvents.lNetworkEvents & FD_READ) ||
- (resEvents.lNetworkEvents & FD_ACCEPT) ||
- (resEvents.lNetworkEvents & FD_CLOSE))
- {
- FD_SET(sockets[i], &outreadfds);
- nummatches++;
- }
- }
- /* Write activity? */
- if (writefds && FD_ISSET(sockets[i], writefds))
- {
- if ((resEvents.lNetworkEvents & FD_WRITE) ||
- (resEvents.lNetworkEvents & FD_CLOSE))
- {
- FD_SET(sockets[i], &outwritefds);
- nummatches++;
- }
- }
- }
- }
- /* Clean up all the event objects */
- for (i = 0; i < numevents; i++)
- {
- WSAEventSelect(sockets[i], NULL, 0);
- WSACloseEvent(events[i]);
- }
- if (r == WSA_WAIT_TIMEOUT)
- {
- if (readfds)
- FD_ZERO(readfds);
- if (writefds)
- FD_ZERO(writefds);
- return 0;
- }
- if (r == WAIT_OBJECT_0 + numevents)
- {
- pgwin32_dispatch_queued_signals();
- errno = EINTR;
- if (readfds)
- FD_ZERO(readfds);
- if (writefds)
- FD_ZERO(writefds);
- return -1;
- }
- /* Overwrite socket sets with our resulting values */
- if (readfds)
- memcpy(readfds, &outreadfds, sizeof(fd_set));
- if (writefds)
- memcpy(writefds, &outwritefds, sizeof(fd_set));
- return nummatches;
- }
- /*
- * Return win32 error string, since strerror can't
- * handle winsock codes
- */
- static char wserrbuf[256];
- const char *
- pgwin32_socket_strerror(int err)
- {
- static HANDLE handleDLL = INVALID_HANDLE_VALUE;
- if (handleDLL == INVALID_HANDLE_VALUE)
- {
- handleDLL = LoadLibraryEx("netmsg.dll", NULL, DONT_RESOLVE_DLL_REFERENCES | LOAD_LIBRARY_AS_DATAFILE);
- if (handleDLL == NULL)
- ereport(FATAL,
- (errmsg_internal("could not load netmsg.dll: error code %lu", GetLastError())));
- }
- ZeroMemory(&wserrbuf, sizeof(wserrbuf));
- if (FormatMessage(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_FROM_HMODULE,
- handleDLL,
- err,
- MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT),
- wserrbuf,
- sizeof(wserrbuf) - 1,
- NULL) == 0)
- {
- /* Failed to get id */
- sprintf(wserrbuf, "unrecognized winsock error %d", err);
- }
- return wserrbuf;
- }