PageRenderTime 22ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/src/backend/port/win32/socket.c

https://github.com/bbt123/postgres
C | 677 lines | 467 code | 84 blank | 126 comment | 129 complexity | 6fb12f3bdefbaf5d80a7d879e0387acc MD5 | raw file
Possible License(s): AGPL-3.0
  1. /*-------------------------------------------------------------------------
  2. *
  3. * socket.c
  4. * Microsoft Windows Win32 Socket Functions
  5. *
  6. * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  7. *
  8. * IDENTIFICATION
  9. * src/backend/port/win32/socket.c
  10. *
  11. *-------------------------------------------------------------------------
  12. */
  13. #include "postgres.h"
  14. /*
  15. * Indicate if pgwin32_recv() and pgwin32_send() should operate
  16. * in non-blocking mode.
  17. *
  18. * Since the socket emulation layer always sets the actual socket to
  19. * non-blocking mode in order to be able to deliver signals, we must
  20. * specify this in a separate flag if we actually need non-blocking
  21. * operation.
  22. *
  23. * This flag changes the behaviour *globally* for all socket operations,
  24. * so it should only be set for very short periods of time.
  25. */
  26. int pgwin32_noblock = 0;
  27. #undef socket
  28. #undef accept
  29. #undef connect
  30. #undef select
  31. #undef recv
  32. #undef send
  33. /*
  34. * Blocking socket functions implemented so they listen on both
  35. * the socket and the signal event, required for signal handling.
  36. */
  37. /*
  38. * Convert the last socket error code into errno
  39. */
  40. static void
  41. TranslateSocketError(void)
  42. {
  43. switch (WSAGetLastError())
  44. {
  45. case WSANOTINITIALISED:
  46. case WSAENETDOWN:
  47. case WSAEINPROGRESS:
  48. case WSAEINVAL:
  49. case WSAESOCKTNOSUPPORT:
  50. case WSAEFAULT:
  51. case WSAEINVALIDPROVIDER:
  52. case WSAEINVALIDPROCTABLE:
  53. case WSAEMSGSIZE:
  54. errno = EINVAL;
  55. break;
  56. case WSAEAFNOSUPPORT:
  57. errno = EAFNOSUPPORT;
  58. break;
  59. case WSAEMFILE:
  60. errno = EMFILE;
  61. break;
  62. case WSAENOBUFS:
  63. errno = ENOBUFS;
  64. break;
  65. case WSAEPROTONOSUPPORT:
  66. case WSAEPROTOTYPE:
  67. errno = EPROTONOSUPPORT;
  68. break;
  69. case WSAECONNREFUSED:
  70. errno = ECONNREFUSED;
  71. break;
  72. case WSAEINTR:
  73. errno = EINTR;
  74. break;
  75. case WSAENOTSOCK:
  76. errno = EBADFD;
  77. break;
  78. case WSAEOPNOTSUPP:
  79. errno = EOPNOTSUPP;
  80. break;
  81. case WSAEWOULDBLOCK:
  82. errno = EWOULDBLOCK;
  83. break;
  84. case WSAEACCES:
  85. errno = EACCES;
  86. break;
  87. case WSAENOTCONN:
  88. case WSAENETRESET:
  89. case WSAECONNRESET:
  90. case WSAESHUTDOWN:
  91. case WSAECONNABORTED:
  92. case WSAEDISCON:
  93. errno = ECONNREFUSED; /* ENOTCONN? */
  94. break;
  95. default:
  96. ereport(NOTICE,
  97. (errmsg_internal("unrecognized win32 socket error code: %d", WSAGetLastError())));
  98. errno = EINVAL;
  99. }
  100. }
  101. static int
  102. pgwin32_poll_signals(void)
  103. {
  104. if (UNBLOCKED_SIGNAL_QUEUE())
  105. {
  106. pgwin32_dispatch_queued_signals();
  107. errno = EINTR;
  108. return 1;
  109. }
  110. return 0;
  111. }
  112. static int
  113. isDataGram(SOCKET s)
  114. {
  115. int type;
  116. int typelen = sizeof(type);
  117. if (getsockopt(s, SOL_SOCKET, SO_TYPE, (char *) &type, &typelen))
  118. return 1;
  119. return (type == SOCK_DGRAM) ? 1 : 0;
  120. }
  121. int
  122. pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout)
  123. {
  124. static HANDLE waitevent = INVALID_HANDLE_VALUE;
  125. static SOCKET current_socket = INVALID_SOCKET;
  126. static int isUDP = 0;
  127. HANDLE events[2];
  128. int r;
  129. /* Create an event object just once and use it on all future calls */
  130. if (waitevent == INVALID_HANDLE_VALUE)
  131. {
  132. waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);
  133. if (waitevent == INVALID_HANDLE_VALUE)
  134. ereport(ERROR,
  135. (errmsg_internal("could not create socket waiting event: error code %lu", GetLastError())));
  136. }
  137. else if (!ResetEvent(waitevent))
  138. ereport(ERROR,
  139. (errmsg_internal("could not reset socket waiting event: error code %lu", GetLastError())));
  140. /*
  141. * Track whether socket is UDP or not. (NB: most likely, this is both
  142. * useless and wrong; there is no reason to think that the behavior of
  143. * WSAEventSelect is different for TCP and UDP.)
  144. */
  145. if (current_socket != s)
  146. isUDP = isDataGram(s);
  147. current_socket = s;
  148. /*
  149. * Attach event to socket. NOTE: we must detach it again before
  150. * returning, since other bits of code may try to attach other events to
  151. * the socket.
  152. */
  153. if (WSAEventSelect(s, waitevent, what) != 0)
  154. {
  155. TranslateSocketError();
  156. return 0;
  157. }
  158. events[0] = pgwin32_signal_event;
  159. events[1] = waitevent;
  160. /*
  161. * Just a workaround of unknown locking problem with writing in UDP socket
  162. * under high load: Client's pgsql backend sleeps infinitely in
  163. * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select().
  164. * So, we will wait with small timeout(0.1 sec) and if socket is still
  165. * blocked, try WSASend (see comments in pgwin32_select) and wait again.
  166. */
  167. if ((what & FD_WRITE) && isUDP)
  168. {
  169. for (;;)
  170. {
  171. r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);
  172. if (r == WAIT_TIMEOUT)
  173. {
  174. char c;
  175. WSABUF buf;
  176. DWORD sent;
  177. buf.buf = &c;
  178. buf.len = 0;
  179. r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL);
  180. if (r == 0) /* Completed - means things are fine! */
  181. {
  182. WSAEventSelect(s, NULL, 0);
  183. return 1;
  184. }
  185. else if (WSAGetLastError() != WSAEWOULDBLOCK)
  186. {
  187. TranslateSocketError();
  188. WSAEventSelect(s, NULL, 0);
  189. return 0;
  190. }
  191. }
  192. else
  193. break;
  194. }
  195. }
  196. else
  197. r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE);
  198. WSAEventSelect(s, NULL, 0);
  199. if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
  200. {
  201. pgwin32_dispatch_queued_signals();
  202. errno = EINTR;
  203. return 0;
  204. }
  205. if (r == WAIT_OBJECT_0 + 1)
  206. return 1;
  207. if (r == WAIT_TIMEOUT)
  208. {
  209. errno = EWOULDBLOCK;
  210. return 0;
  211. }
  212. ereport(ERROR,
  213. (errmsg_internal("unrecognized return value from WaitForMultipleObjects: %d (error code %lu)", r, GetLastError())));
  214. return 0;
  215. }
  216. /*
  217. * Create a socket, setting it to overlapped and non-blocking
  218. */
  219. SOCKET
  220. pgwin32_socket(int af, int type, int protocol)
  221. {
  222. SOCKET s;
  223. unsigned long on = 1;
  224. s = WSASocket(af, type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED);
  225. if (s == INVALID_SOCKET)
  226. {
  227. TranslateSocketError();
  228. return INVALID_SOCKET;
  229. }
  230. if (ioctlsocket(s, FIONBIO, &on))
  231. {
  232. TranslateSocketError();
  233. return INVALID_SOCKET;
  234. }
  235. errno = 0;
  236. return s;
  237. }
  238. SOCKET
  239. pgwin32_accept(SOCKET s, struct sockaddr * addr, int *addrlen)
  240. {
  241. SOCKET rs;
  242. /*
  243. * Poll for signals, but don't return with EINTR, since we don't handle
  244. * that in pqcomm.c
  245. */
  246. pgwin32_poll_signals();
  247. rs = WSAAccept(s, addr, addrlen, NULL, 0);
  248. if (rs == INVALID_SOCKET)
  249. {
  250. TranslateSocketError();
  251. return INVALID_SOCKET;
  252. }
  253. return rs;
  254. }
  255. /* No signal delivery during connect. */
  256. int
  257. pgwin32_connect(SOCKET s, const struct sockaddr * addr, int addrlen)
  258. {
  259. int r;
  260. r = WSAConnect(s, addr, addrlen, NULL, NULL, NULL, NULL);
  261. if (r == 0)
  262. return 0;
  263. if (WSAGetLastError() != WSAEWOULDBLOCK)
  264. {
  265. TranslateSocketError();
  266. return -1;
  267. }
  268. while (pgwin32_waitforsinglesocket(s, FD_CONNECT, INFINITE) == 0)
  269. {
  270. /* Loop endlessly as long as we are just delivering signals */
  271. }
  272. return 0;
  273. }
  274. int
  275. pgwin32_recv(SOCKET s, char *buf, int len, int f)
  276. {
  277. WSABUF wbuf;
  278. int r;
  279. DWORD b;
  280. DWORD flags = f;
  281. int n;
  282. if (pgwin32_poll_signals())
  283. return -1;
  284. wbuf.len = len;
  285. wbuf.buf = buf;
  286. r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
  287. if (r != SOCKET_ERROR && b > 0)
  288. /* Read succeeded right away */
  289. return b;
  290. if (r == SOCKET_ERROR &&
  291. WSAGetLastError() != WSAEWOULDBLOCK)
  292. {
  293. TranslateSocketError();
  294. return -1;
  295. }
  296. if (pgwin32_noblock)
  297. {
  298. /*
  299. * No data received, and we are in "emulated non-blocking mode", so
  300. * return indicating that we'd block if we were to continue.
  301. */
  302. errno = EWOULDBLOCK;
  303. return -1;
  304. }
  305. /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
  306. for (n = 0; n < 5; n++)
  307. {
  308. if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT,
  309. INFINITE) == 0)
  310. return -1; /* errno already set */
  311. r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
  312. if (r == SOCKET_ERROR)
  313. {
  314. if (WSAGetLastError() == WSAEWOULDBLOCK)
  315. {
  316. /*
  317. * There seem to be cases on win2k (at least) where WSARecv
  318. * can return WSAEWOULDBLOCK even when
  319. * pgwin32_waitforsinglesocket claims the socket is readable.
  320. * In this case, just sleep for a moment and try again. We try
  321. * up to 5 times - if it fails more than that it's not likely
  322. * to ever come back.
  323. */
  324. pg_usleep(10000);
  325. continue;
  326. }
  327. TranslateSocketError();
  328. return -1;
  329. }
  330. return b;
  331. }
  332. ereport(NOTICE,
  333. (errmsg_internal("could not read from ready socket (after retries)")));
  334. errno = EWOULDBLOCK;
  335. return -1;
  336. }
  337. /*
  338. * The second argument to send() is defined by SUS to be a "const void *"
  339. * and so we use the same signature here to keep compilers happy when
  340. * handling callers.
  341. *
  342. * But the buf member of a WSABUF struct is defined as "char *", so we cast
  343. * the second argument to that here when assigning it, also to keep compilers
  344. * happy.
  345. */
  346. int
  347. pgwin32_send(SOCKET s, const void *buf, int len, int flags)
  348. {
  349. WSABUF wbuf;
  350. int r;
  351. DWORD b;
  352. if (pgwin32_poll_signals())
  353. return -1;
  354. wbuf.len = len;
  355. wbuf.buf = (char *) buf;
  356. /*
  357. * Readiness of socket to send data to UDP socket may be not true: socket
  358. * can become busy again! So loop until send or error occurs.
  359. */
  360. for (;;)
  361. {
  362. r = WSASend(s, &wbuf, 1, &b, flags, NULL, NULL);
  363. if (r != SOCKET_ERROR && b > 0)
  364. /* Write succeeded right away */
  365. return b;
  366. if (r == SOCKET_ERROR &&
  367. WSAGetLastError() != WSAEWOULDBLOCK)
  368. {
  369. TranslateSocketError();
  370. return -1;
  371. }
  372. if (pgwin32_noblock)
  373. {
  374. /*
  375. * No data sent, and we are in "emulated non-blocking mode", so
  376. * return indicating that we'd block if we were to continue.
  377. */
  378. errno = EWOULDBLOCK;
  379. return -1;
  380. }
  381. /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
  382. if (pgwin32_waitforsinglesocket(s, FD_WRITE | FD_CLOSE, INFINITE) == 0)
  383. return -1;
  384. }
  385. return -1;
  386. }
  387. /*
  388. * Wait for activity on one or more sockets.
  389. * While waiting, allow signals to run
  390. *
  391. * NOTE! Currently does not implement exceptfds check,
  392. * since it is not used in postgresql!
  393. */
  394. int
  395. pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout)
  396. {
  397. WSAEVENT events[FD_SETSIZE * 2]; /* worst case is readfds totally
  398. * different from writefds, so
  399. * 2*FD_SETSIZE sockets */
  400. SOCKET sockets[FD_SETSIZE * 2];
  401. int numevents = 0;
  402. int i;
  403. int r;
  404. DWORD timeoutval = WSA_INFINITE;
  405. FD_SET outreadfds;
  406. FD_SET outwritefds;
  407. int nummatches = 0;
  408. Assert(exceptfds == NULL);
  409. if (pgwin32_poll_signals())
  410. return -1;
  411. FD_ZERO(&outreadfds);
  412. FD_ZERO(&outwritefds);
  413. /*
  414. * Write FDs are different in the way that it is only flagged by
  415. * WSASelectEvent() if we have tried to write to them first. So try an
  416. * empty write
  417. */
  418. if (writefds)
  419. {
  420. for (i = 0; i < writefds->fd_count; i++)
  421. {
  422. char c;
  423. WSABUF buf;
  424. DWORD sent;
  425. buf.buf = &c;
  426. buf.len = 0;
  427. r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL);
  428. if (r == 0) /* Completed - means things are fine! */
  429. FD_SET(writefds->fd_array[i], &outwritefds);
  430. else
  431. { /* Not completed */
  432. if (WSAGetLastError() != WSAEWOULDBLOCK)
  433. /*
  434. * Not completed, and not just "would block", so an error
  435. * occurred
  436. */
  437. FD_SET(writefds->fd_array[i], &outwritefds);
  438. }
  439. }
  440. if (outwritefds.fd_count > 0)
  441. {
  442. memcpy(writefds, &outwritefds, sizeof(fd_set));
  443. if (readfds)
  444. FD_ZERO(readfds);
  445. return outwritefds.fd_count;
  446. }
  447. }
  448. /* Now set up for an actual select */
  449. if (timeout != NULL)
  450. {
  451. /* timeoutval is in milliseconds */
  452. timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
  453. }
  454. if (readfds != NULL)
  455. {
  456. for (i = 0; i < readfds->fd_count; i++)
  457. {
  458. events[numevents] = WSACreateEvent();
  459. sockets[numevents] = readfds->fd_array[i];
  460. numevents++;
  461. }
  462. }
  463. if (writefds != NULL)
  464. {
  465. for (i = 0; i < writefds->fd_count; i++)
  466. {
  467. if (!readfds ||
  468. !FD_ISSET(writefds->fd_array[i], readfds))
  469. {
  470. /* If the socket is not in the read list */
  471. events[numevents] = WSACreateEvent();
  472. sockets[numevents] = writefds->fd_array[i];
  473. numevents++;
  474. }
  475. }
  476. }
  477. for (i = 0; i < numevents; i++)
  478. {
  479. int flags = 0;
  480. if (readfds && FD_ISSET(sockets[i], readfds))
  481. flags |= FD_READ | FD_ACCEPT | FD_CLOSE;
  482. if (writefds && FD_ISSET(sockets[i], writefds))
  483. flags |= FD_WRITE | FD_CLOSE;
  484. if (WSAEventSelect(sockets[i], events[i], flags) != 0)
  485. {
  486. TranslateSocketError();
  487. /* release already-assigned event objects */
  488. while (--i >= 0)
  489. WSAEventSelect(sockets[i], NULL, 0);
  490. for (i = 0; i < numevents; i++)
  491. WSACloseEvent(events[i]);
  492. return -1;
  493. }
  494. }
  495. events[numevents] = pgwin32_signal_event;
  496. r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
  497. if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
  498. {
  499. /*
  500. * We scan all events, even those not signalled, in case more than one
  501. * event has been tagged but Wait.. can only return one.
  502. */
  503. WSANETWORKEVENTS resEvents;
  504. for (i = 0; i < numevents; i++)
  505. {
  506. ZeroMemory(&resEvents, sizeof(resEvents));
  507. if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) != 0)
  508. elog(ERROR, "failed to enumerate network events: error code %u",
  509. WSAGetLastError());
  510. /* Read activity? */
  511. if (readfds && FD_ISSET(sockets[i], readfds))
  512. {
  513. if ((resEvents.lNetworkEvents & FD_READ) ||
  514. (resEvents.lNetworkEvents & FD_ACCEPT) ||
  515. (resEvents.lNetworkEvents & FD_CLOSE))
  516. {
  517. FD_SET(sockets[i], &outreadfds);
  518. nummatches++;
  519. }
  520. }
  521. /* Write activity? */
  522. if (writefds && FD_ISSET(sockets[i], writefds))
  523. {
  524. if ((resEvents.lNetworkEvents & FD_WRITE) ||
  525. (resEvents.lNetworkEvents & FD_CLOSE))
  526. {
  527. FD_SET(sockets[i], &outwritefds);
  528. nummatches++;
  529. }
  530. }
  531. }
  532. }
  533. /* Clean up all the event objects */
  534. for (i = 0; i < numevents; i++)
  535. {
  536. WSAEventSelect(sockets[i], NULL, 0);
  537. WSACloseEvent(events[i]);
  538. }
  539. if (r == WSA_WAIT_TIMEOUT)
  540. {
  541. if (readfds)
  542. FD_ZERO(readfds);
  543. if (writefds)
  544. FD_ZERO(writefds);
  545. return 0;
  546. }
  547. if (r == WAIT_OBJECT_0 + numevents)
  548. {
  549. pgwin32_dispatch_queued_signals();
  550. errno = EINTR;
  551. if (readfds)
  552. FD_ZERO(readfds);
  553. if (writefds)
  554. FD_ZERO(writefds);
  555. return -1;
  556. }
  557. /* Overwrite socket sets with our resulting values */
  558. if (readfds)
  559. memcpy(readfds, &outreadfds, sizeof(fd_set));
  560. if (writefds)
  561. memcpy(writefds, &outwritefds, sizeof(fd_set));
  562. return nummatches;
  563. }
  564. /*
  565. * Return win32 error string, since strerror can't
  566. * handle winsock codes
  567. */
  568. static char wserrbuf[256];
  569. const char *
  570. pgwin32_socket_strerror(int err)
  571. {
  572. static HANDLE handleDLL = INVALID_HANDLE_VALUE;
  573. if (handleDLL == INVALID_HANDLE_VALUE)
  574. {
  575. handleDLL = LoadLibraryEx("netmsg.dll", NULL, DONT_RESOLVE_DLL_REFERENCES | LOAD_LIBRARY_AS_DATAFILE);
  576. if (handleDLL == NULL)
  577. ereport(FATAL,
  578. (errmsg_internal("could not load netmsg.dll: error code %lu", GetLastError())));
  579. }
  580. ZeroMemory(&wserrbuf, sizeof(wserrbuf));
  581. if (FormatMessage(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_FROM_HMODULE,
  582. handleDLL,
  583. err,
  584. MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT),
  585. wserrbuf,
  586. sizeof(wserrbuf) - 1,
  587. NULL) == 0)
  588. {
  589. /* Failed to get id */
  590. sprintf(wserrbuf, "unrecognized winsock error %d", err);
  591. }
  592. return wserrbuf;
  593. }