PageRenderTime 30ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/xlators/protocol/legacy/transport/socket/src/socket.c

https://github.com/dopry/glusterfs
C | 1625 lines | 1242 code | 329 blank | 54 comment | 232 complexity | fcb24a73f68c2a2e6e846c0ade11f687 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.0, GPL-2.0, Apache-2.0, BSD-3-Clause
  1. /*
  2. Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
  3. This file is part of GlusterFS.
  4. GlusterFS is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published
  6. by the Free Software Foundation; either version 3 of the License,
  7. or (at your option) any later version.
  8. GlusterFS is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see
  14. <http://www.gnu.org/licenses/>.
  15. */
  16. #ifndef _CONFIG_H
  17. #define _CONFIG_H
  18. #include "config.h"
  19. #endif
  20. #include "socket.h"
  21. #include "name.h"
  22. #include "dict.h"
  23. #include "transport.h"
  24. #include "logging.h"
  25. #include "xlator.h"
  26. #include "byte-order.h"
  27. #include "common-utils.h"
  28. #include "compat-errno.h"
  29. #include <fcntl.h>
  30. #include <errno.h>
  31. #include <netinet/tcp.h>
  32. #define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR)
  33. #define SA(ptr) ((struct sockaddr *)ptr)
  34. static int socket_init (transport_t *this);
  35. /*
  36. * return value:
  37. * 0 = success (completed)
  38. * -1 = error
  39. * > 0 = incomplete
  40. */
  41. static int
  42. __socket_rwv (transport_t *this, struct iovec *vector, int count,
  43. struct iovec **pending_vector, int *pending_count,
  44. int write)
  45. {
  46. socket_private_t *priv = NULL;
  47. int sock = -1;
  48. int ret = -1;
  49. struct iovec *opvector = NULL;
  50. int opcount = 0;
  51. int moved = 0;
  52. priv = this->private;
  53. sock = priv->sock;
  54. opvector = vector;
  55. opcount = count;
  56. while (opcount) {
  57. if (write) {
  58. ret = writev (sock, opvector, opcount);
  59. if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
  60. /* done for now */
  61. break;
  62. }
  63. } else {
  64. ret = readv (sock, opvector, opcount);
  65. if (ret == -1 && errno == EAGAIN) {
  66. /* done for now */
  67. break;
  68. }
  69. }
  70. if (ret == 0) {
  71. /* Mostly due to 'umount' in client */
  72. gf_log (this->xl->name, GF_LOG_TRACE,
  73. "EOF from peer %s", this->peerinfo.identifier);
  74. opcount = -1;
  75. errno = ENOTCONN;
  76. break;
  77. }
  78. if (ret == -1) {
  79. if (errno == EINTR)
  80. continue;
  81. gf_log (this->xl->name, GF_LOG_TRACE,
  82. "%s failed (%s)", write ? "writev" : "readv",
  83. strerror (errno));
  84. opcount = -1;
  85. break;
  86. }
  87. moved = 0;
  88. while (moved < ret) {
  89. if ((ret - moved) >= opvector[0].iov_len) {
  90. moved += opvector[0].iov_len;
  91. opvector++;
  92. opcount--;
  93. } else {
  94. opvector[0].iov_len -= (ret - moved);
  95. opvector[0].iov_base += (ret - moved);
  96. moved += (ret - moved);
  97. }
  98. while (opcount && !opvector[0].iov_len) {
  99. opvector++;
  100. opcount--;
  101. }
  102. }
  103. }
  104. if (pending_vector)
  105. *pending_vector = opvector;
  106. if (pending_count)
  107. *pending_count = opcount;
  108. return opcount;
  109. }
  110. static int
  111. __socket_readv (transport_t *this, struct iovec *vector, int count,
  112. struct iovec **pending_vector, int *pending_count)
  113. {
  114. int ret = -1;
  115. ret = __socket_rwv (this, vector, count,
  116. pending_vector, pending_count, 0);
  117. return ret;
  118. }
  119. static int
  120. __socket_writev (transport_t *this, struct iovec *vector, int count,
  121. struct iovec **pending_vector, int *pending_count)
  122. {
  123. int ret = -1;
  124. ret = __socket_rwv (this, vector, count,
  125. pending_vector, pending_count, 1);
  126. return ret;
  127. }
  128. static int
  129. __socket_disconnect (transport_t *this)
  130. {
  131. socket_private_t *priv = NULL;
  132. int ret = -1;
  133. priv = this->private;
  134. if (priv->sock != -1) {
  135. ret = shutdown (priv->sock, SHUT_RDWR);
  136. priv->connected = -1;
  137. gf_log (this->xl->name, GF_LOG_TRACE,
  138. "shutdown() returned %d. set connection state to -1",
  139. ret);
  140. }
  141. return ret;
  142. }
  143. static int
  144. __socket_server_bind (transport_t *this)
  145. {
  146. socket_private_t *priv = NULL;
  147. int ret = -1;
  148. int opt = 1;
  149. priv = this->private;
  150. ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR,
  151. &opt, sizeof (opt));
  152. if (ret == -1) {
  153. gf_log (this->xl->name, GF_LOG_ERROR,
  154. "setsockopt() for SO_REUSEADDR failed (%s)",
  155. strerror (errno));
  156. }
  157. ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
  158. this->myinfo.sockaddr_len);
  159. if (ret == -1) {
  160. gf_log (this->xl->name, GF_LOG_ERROR,
  161. "binding to %s failed: %s",
  162. this->myinfo.identifier, strerror (errno));
  163. if (errno == EADDRINUSE) {
  164. gf_log (this->xl->name, GF_LOG_ERROR,
  165. "Port is already in use");
  166. }
  167. }
  168. return ret;
  169. }
  170. static int
  171. __socket_nonblock (int fd)
  172. {
  173. int flags = 0;
  174. int ret = -1;
  175. flags = fcntl (fd, F_GETFL);
  176. if (flags != -1)
  177. ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
  178. return ret;
  179. }
  180. static int
  181. __socket_nodelay (int fd)
  182. {
  183. int on = 1;
  184. int ret = -1;
  185. ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY,
  186. &on, sizeof (on));
  187. if (!ret)
  188. gf_log ("", GF_LOG_TRACE,
  189. "NODELAY enabled for socket %d", fd);
  190. return ret;
  191. }
  192. static int
  193. __socket_keepalive (int fd, int keepalive_intvl)
  194. {
  195. int on = 1;
  196. int ret = -1;
  197. ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof (on));
  198. if (ret == -1)
  199. goto err;
  200. if (keepalive_intvl == GF_USE_DEFAULT_KEEPALIVE)
  201. goto done;
  202. #ifndef GF_LINUX_HOST_OS
  203. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_intvl,
  204. sizeof (keepalive_intvl));
  205. if (ret == -1)
  206. goto err;
  207. #else
  208. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_intvl,
  209. sizeof (keepalive_intvl));
  210. if (ret == -1)
  211. goto err;
  212. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepalive_intvl,
  213. sizeof (keepalive_intvl));
  214. if (ret == -1)
  215. goto err;
  216. #endif
  217. done:
  218. gf_log ("", GF_LOG_TRACE, "Keep-alive enabled for socket %d, interval "
  219. "%d", fd, keepalive_intvl);
  220. err:
  221. return ret;
  222. }
  223. static int
  224. __socket_connect_finish (int fd)
  225. {
  226. int ret = -1;
  227. int optval = 0;
  228. socklen_t optlen = sizeof (int);
  229. ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
  230. if (ret == 0 && optval) {
  231. errno = optval;
  232. ret = -1;
  233. }
  234. return ret;
  235. }
  236. static void
  237. __socket_reset (transport_t *this)
  238. {
  239. socket_private_t *priv = NULL;
  240. priv = this->private;
  241. /* TODO: use mem-pool on incoming data */
  242. if (priv->incoming.hdr_p)
  243. GF_FREE (priv->incoming.hdr_p);
  244. if (priv->incoming.iobuf)
  245. iobuf_unref (priv->incoming.iobuf);
  246. memset (&priv->incoming, 0, sizeof (priv->incoming));
  247. event_unregister (this->xl->ctx->event_pool, priv->sock, priv->idx);
  248. close (priv->sock);
  249. priv->sock = -1;
  250. priv->idx = -1;
  251. priv->connected = -1;
  252. }
  253. static struct ioq *
  254. __socket_ioq_new (transport_t *this, char *buf, int len,
  255. struct iovec *vector, int count, struct iobref *iobref)
  256. {
  257. socket_private_t *priv = NULL;
  258. struct ioq *entry = NULL;
  259. priv = this->private;
  260. /* TODO: use mem-pool */
  261. entry = GF_CALLOC (1, sizeof (*entry),
  262. gf_common_mt_ioq);
  263. if (!entry)
  264. return NULL;
  265. GF_ASSERT (count <= (MAX_IOVEC-2));
  266. entry->header.colonO[0] = ':';
  267. entry->header.colonO[1] = 'O';
  268. entry->header.colonO[2] = '\0';
  269. entry->header.version = 42;
  270. entry->header.size1 = hton32 (len);
  271. entry->header.size2 = hton32 (iov_length (vector, count));
  272. entry->vector[0].iov_base = &entry->header;
  273. entry->vector[0].iov_len = sizeof (entry->header);
  274. entry->count++;
  275. entry->vector[1].iov_base = buf;
  276. entry->vector[1].iov_len = len;
  277. entry->count++;
  278. if (vector && count) {
  279. memcpy (&entry->vector[2], vector, sizeof (*vector) * count);
  280. entry->count += count;
  281. }
  282. entry->pending_vector = entry->vector;
  283. entry->pending_count = entry->count;
  284. if (iobref)
  285. entry->iobref = iobref_ref (iobref);
  286. entry->buf = buf;
  287. INIT_LIST_HEAD (&entry->list);
  288. return entry;
  289. }
  290. static void
  291. __socket_ioq_entry_free (struct ioq *entry)
  292. {
  293. list_del_init (&entry->list);
  294. if (entry->iobref)
  295. iobref_unref (entry->iobref);
  296. /* TODO: use mem-pool */
  297. GF_FREE (entry->buf);
  298. /* TODO: use mem-pool */
  299. GF_FREE (entry);
  300. }
  301. static void
  302. __socket_ioq_flush (transport_t *this)
  303. {
  304. socket_private_t *priv = NULL;
  305. struct ioq *entry = NULL;
  306. priv = this->private;
  307. while (!list_empty (&priv->ioq)) {
  308. entry = priv->ioq_next;
  309. __socket_ioq_entry_free (entry);
  310. }
  311. return;
  312. }
  313. static int
  314. __socket_ioq_churn_entry (transport_t *this, struct ioq *entry)
  315. {
  316. int ret = -1;
  317. ret = __socket_writev (this, entry->pending_vector,
  318. entry->pending_count,
  319. &entry->pending_vector,
  320. &entry->pending_count);
  321. if (ret == 0) {
  322. /* current entry was completely written */
  323. GF_ASSERT (entry->pending_count == 0);
  324. __socket_ioq_entry_free (entry);
  325. }
  326. return ret;
  327. }
  328. static int
  329. __socket_ioq_churn (transport_t *this)
  330. {
  331. socket_private_t *priv = NULL;
  332. int ret = 0;
  333. struct ioq *entry = NULL;
  334. priv = this->private;
  335. while (!list_empty (&priv->ioq)) {
  336. /* pick next entry */
  337. entry = priv->ioq_next;
  338. ret = __socket_ioq_churn_entry (this, entry);
  339. if (ret != 0)
  340. break;
  341. }
  342. if (list_empty (&priv->ioq)) {
  343. /* all pending writes done, not interested in POLLOUT */
  344. priv->idx = event_select_on (this->xl->ctx->event_pool,
  345. priv->sock, priv->idx, -1, 0);
  346. }
  347. return ret;
  348. }
  349. static int
  350. socket_event_poll_err (transport_t *this)
  351. {
  352. socket_private_t *priv = NULL;
  353. int ret = -1;
  354. priv = this->private;
  355. pthread_mutex_lock (&priv->lock);
  356. {
  357. __socket_ioq_flush (this);
  358. __socket_reset (this);
  359. }
  360. pthread_mutex_unlock (&priv->lock);
  361. xlator_notify (this->xl, GF_EVENT_POLLERR, this);
  362. return ret;
  363. }
  364. static int
  365. socket_event_poll_out (transport_t *this)
  366. {
  367. socket_private_t *priv = NULL;
  368. int ret = -1;
  369. priv = this->private;
  370. pthread_mutex_lock (&priv->lock);
  371. {
  372. if (priv->connected == 1) {
  373. ret = __socket_ioq_churn (this);
  374. if (ret == -1) {
  375. __socket_disconnect (this);
  376. }
  377. }
  378. }
  379. pthread_mutex_unlock (&priv->lock);
  380. xlator_notify (this->xl, GF_EVENT_POLLOUT, this);
  381. return ret;
  382. }
  383. static int
  384. __socket_proto_validate_header (transport_t *this,
  385. struct socket_header *header,
  386. size_t *size1_p, size_t *size2_p)
  387. {
  388. size_t size1 = 0;
  389. size_t size2 = 0;
  390. if (strcmp (header->colonO, ":O")) {
  391. gf_log (this->xl->name, GF_LOG_DEBUG,
  392. "socket header signature does not match :O (%x.%x.%x)",
  393. header->colonO[0], header->colonO[1],
  394. header->colonO[2]);
  395. return -1;
  396. }
  397. if (header->version != 42) {
  398. gf_log (this->xl->name, GF_LOG_DEBUG,
  399. "socket header version does not match 42 != %d",
  400. header->version);
  401. return -1;
  402. }
  403. size1 = ntoh32 (header->size1);
  404. size2 = ntoh32 (header->size2);
  405. if (size1 <= 0 || size1 > 1048576) {
  406. gf_log (this->xl->name, GF_LOG_DEBUG,
  407. "socket header has incorrect size1=%"GF_PRI_SIZET,
  408. size1);
  409. return -1;
  410. }
  411. if (size2 > (131072)) {
  412. gf_log (this->xl->name, GF_LOG_DEBUG,
  413. "socket header has incorrect size2=%"GF_PRI_SIZET,
  414. size2);
  415. return -1;
  416. }
  417. if (size1_p)
  418. *size1_p = size1;
  419. if (size2_p)
  420. *size2_p = size2;
  421. return 0;
  422. }
  423. /* socket protocol state machine */
  424. static int
  425. __socket_proto_state_machine (transport_t *this)
  426. {
  427. int ret = -1;
  428. socket_private_t *priv = NULL;
  429. size_t size1 = 0;
  430. size_t size2 = 0;
  431. int previous_state = -1;
  432. struct socket_header *hdr = NULL;
  433. struct iobuf *iobuf = NULL;
  434. priv = this->private;
  435. while (priv->incoming.state != SOCKET_PROTO_STATE_COMPLETE) {
  436. /* debug check against infinite loops */
  437. if (previous_state == priv->incoming.state) {
  438. gf_log (this->xl->name, GF_LOG_DEBUG,
  439. "state did not change! (%d) breaking",
  440. previous_state);
  441. ret = -1;
  442. goto unlock;
  443. }
  444. previous_state = priv->incoming.state;
  445. switch (priv->incoming.state) {
  446. case SOCKET_PROTO_STATE_NADA:
  447. priv->incoming.pending_vector =
  448. priv->incoming.vector;
  449. priv->incoming.pending_vector->iov_base =
  450. &priv->incoming.header;
  451. priv->incoming.pending_vector->iov_len =
  452. sizeof (struct socket_header);
  453. priv->incoming.state =
  454. SOCKET_PROTO_STATE_HEADER_COMING;
  455. break;
  456. case SOCKET_PROTO_STATE_HEADER_COMING:
  457. ret = __socket_readv (this,
  458. priv->incoming.pending_vector, 1,
  459. &priv->incoming.pending_vector,
  460. NULL);
  461. if (ret == 0) {
  462. priv->incoming.state =
  463. SOCKET_PROTO_STATE_HEADER_CAME;
  464. break;
  465. }
  466. if (ret == -1) {
  467. gf_log (this->xl->name, GF_LOG_TRACE,
  468. "read (%s) in state %d (%s)",
  469. strerror (errno),
  470. SOCKET_PROTO_STATE_HEADER_COMING,
  471. this->peerinfo.identifier);
  472. goto unlock;
  473. }
  474. if (ret > 0) {
  475. gf_log (this->xl->name, GF_LOG_TRACE,
  476. "partial header read on NB socket.");
  477. goto unlock;
  478. }
  479. break;
  480. case SOCKET_PROTO_STATE_HEADER_CAME:
  481. hdr = &priv->incoming.header;
  482. ret = __socket_proto_validate_header (this, hdr,
  483. &size1, &size2);
  484. if (ret == -1) {
  485. gf_log (this->xl->name, GF_LOG_ERROR,
  486. "socket header validate failed (%s). "
  487. "possible mismatch of transport-type "
  488. "between server and client volumes, "
  489. "or version mismatch",
  490. this->peerinfo.identifier);
  491. goto unlock;
  492. }
  493. priv->incoming.hdrlen = size1;
  494. priv->incoming.buflen = size2;
  495. /* TODO: use mem-pool */
  496. priv->incoming.hdr_p = GF_MALLOC (size1,
  497. gf_common_mt_char);
  498. if (size2) {
  499. /* TODO: sanity check size2 < page size
  500. */
  501. iobuf = iobuf_get (this->xl->ctx->iobuf_pool);
  502. if (!iobuf) {
  503. gf_log (this->xl->name, GF_LOG_ERROR,
  504. "unable to allocate IO buffer "
  505. "for peer %s",
  506. this->peerinfo.identifier);
  507. ret = -ENOMEM;
  508. goto unlock;
  509. }
  510. priv->incoming.iobuf = iobuf;
  511. priv->incoming.buf_p = iobuf->ptr;
  512. }
  513. priv->incoming.vector[0].iov_base =
  514. priv->incoming.hdr_p;
  515. priv->incoming.vector[0].iov_len = size1;
  516. priv->incoming.vector[1].iov_base =
  517. priv->incoming.buf_p;
  518. priv->incoming.vector[1].iov_len = size2;
  519. priv->incoming.count = size2 ? 2 : 1;
  520. priv->incoming.pending_vector =
  521. priv->incoming.vector;
  522. priv->incoming.pending_count =
  523. priv->incoming.count;
  524. priv->incoming.state =
  525. SOCKET_PROTO_STATE_DATA_COMING;
  526. break;
  527. case SOCKET_PROTO_STATE_DATA_COMING:
  528. ret = __socket_readv (this,
  529. priv->incoming.pending_vector,
  530. priv->incoming.pending_count,
  531. &priv->incoming.pending_vector,
  532. &priv->incoming.pending_count);
  533. if (ret == 0) {
  534. priv->incoming.state =
  535. SOCKET_PROTO_STATE_DATA_CAME;
  536. break;
  537. }
  538. if (ret == -1) {
  539. gf_log (this->xl->name, GF_LOG_DEBUG,
  540. "read (%s) in state %d (%s)",
  541. strerror (errno),
  542. SOCKET_PROTO_STATE_DATA_COMING,
  543. this->peerinfo.identifier);
  544. goto unlock;
  545. }
  546. if (ret > 0) {
  547. gf_log (this->xl->name, GF_LOG_TRACE,
  548. "partial data read on NB socket");
  549. goto unlock;
  550. }
  551. break;
  552. case SOCKET_PROTO_STATE_DATA_CAME:
  553. memset (&priv->incoming.vector, 0,
  554. sizeof (priv->incoming.vector));
  555. priv->incoming.pending_vector = NULL;
  556. priv->incoming.pending_count = 0;
  557. priv->incoming.state = SOCKET_PROTO_STATE_COMPLETE;
  558. break;
  559. case SOCKET_PROTO_STATE_COMPLETE:
  560. /* not reached */
  561. break;
  562. default:
  563. gf_log (this->xl->name, GF_LOG_DEBUG,
  564. "undefined state reached: %d",
  565. priv->incoming.state);
  566. goto unlock;
  567. }
  568. }
  569. unlock:
  570. return ret;
  571. }
  572. static int
  573. socket_proto_state_machine (transport_t *this)
  574. {
  575. socket_private_t *priv = NULL;
  576. int ret = 0;
  577. priv = this->private;
  578. pthread_mutex_lock (&priv->lock);
  579. {
  580. ret = __socket_proto_state_machine (this);
  581. }
  582. pthread_mutex_unlock (&priv->lock);
  583. return ret;
  584. }
  585. static int
  586. socket_event_poll_in (transport_t *this)
  587. {
  588. int ret = -1;
  589. ret = socket_proto_state_machine (this);
  590. /* call POLLIN on xlator even if complete block is not received,
  591. just to keep the last_received timestamp ticking */
  592. if (ret == 0)
  593. ret = xlator_notify (this->xl, GF_EVENT_POLLIN, this);
  594. return ret;
  595. }
  596. static int
  597. socket_connect_finish (transport_t *this)
  598. {
  599. int ret = -1;
  600. socket_private_t *priv = NULL;
  601. int event = -1;
  602. char notify_xlator = 0;
  603. priv = this->private;
  604. pthread_mutex_lock (&priv->lock);
  605. {
  606. if (priv->connected)
  607. goto unlock;
  608. ret = __socket_connect_finish (priv->sock);
  609. if (ret == -1 && errno == EINPROGRESS)
  610. ret = 1;
  611. if (ret == -1 && errno != EINPROGRESS) {
  612. if (!priv->connect_finish_log) {
  613. gf_log (this->xl->name, GF_LOG_ERROR,
  614. "connection to %s failed (%s)",
  615. this->peerinfo.identifier,
  616. strerror (errno));
  617. priv->connect_finish_log = 1;
  618. }
  619. __socket_disconnect (this);
  620. notify_xlator = 1;
  621. event = GF_EVENT_POLLERR;
  622. goto unlock;
  623. }
  624. if (ret == 0) {
  625. notify_xlator = 1;
  626. this->myinfo.sockaddr_len =
  627. sizeof (this->myinfo.sockaddr);
  628. ret = getsockname (priv->sock,
  629. SA (&this->myinfo.sockaddr),
  630. &this->myinfo.sockaddr_len);
  631. if (ret == -1) {
  632. gf_log (this->xl->name, GF_LOG_DEBUG,
  633. "getsockname on (%d) failed (%s)",
  634. priv->sock, strerror (errno));
  635. __socket_disconnect (this);
  636. event = GF_EVENT_POLLERR;
  637. goto unlock;
  638. }
  639. priv->connected = 1;
  640. priv->connect_finish_log = 0;
  641. event = GF_EVENT_CHILD_UP;
  642. gf_get_transport_identifiers (this);
  643. }
  644. }
  645. unlock:
  646. pthread_mutex_unlock (&priv->lock);
  647. if (notify_xlator)
  648. xlator_notify (this->xl, event, this);
  649. return 0;
  650. }
  651. static int
  652. socket_event_handler (int fd, int idx, void *data,
  653. int poll_in, int poll_out, int poll_err)
  654. {
  655. transport_t *this = NULL;
  656. socket_private_t *priv = NULL;
  657. int ret = 0;
  658. this = data;
  659. priv = this->private;
  660. pthread_mutex_lock (&priv->lock);
  661. {
  662. priv->idx = idx;
  663. }
  664. pthread_mutex_unlock (&priv->lock);
  665. if (!priv->connected) {
  666. ret = socket_connect_finish (this);
  667. }
  668. if (!ret && poll_out) {
  669. ret = socket_event_poll_out (this);
  670. }
  671. if (!ret && poll_in) {
  672. ret = socket_event_poll_in (this);
  673. }
  674. if (ret < 0 || poll_err) {
  675. socket_event_poll_err (this);
  676. transport_unref (this);
  677. }
  678. return 0;
  679. }
  680. static int
  681. socket_server_event_handler (int fd, int idx, void *data,
  682. int poll_in, int poll_out, int poll_err)
  683. {
  684. transport_t *this = NULL;
  685. socket_private_t *priv = NULL;
  686. int ret = 0;
  687. int new_sock = -1;
  688. transport_t *new_trans = NULL;
  689. struct sockaddr_storage new_sockaddr = {0, };
  690. socklen_t addrlen = sizeof (new_sockaddr);
  691. socket_private_t *new_priv = NULL;
  692. glusterfs_ctx_t *ctx = NULL;
  693. this = data;
  694. priv = this->private;
  695. ctx = this->xl->ctx;
  696. pthread_mutex_lock (&priv->lock);
  697. {
  698. priv->idx = idx;
  699. if (poll_in) {
  700. new_sock = accept (priv->sock, SA (&new_sockaddr),
  701. &addrlen);
  702. if (new_sock == -1)
  703. goto unlock;
  704. if (!priv->bio) {
  705. ret = __socket_nonblock (new_sock);
  706. if (ret == -1) {
  707. gf_log (this->xl->name, GF_LOG_DEBUG,
  708. "NBIO on %d failed (%s)",
  709. new_sock, strerror (errno));
  710. close (new_sock);
  711. goto unlock;
  712. }
  713. }
  714. if (priv->nodelay) {
  715. ret = __socket_nodelay (new_sock);
  716. if (ret == -1) {
  717. gf_log (this->xl->name, GF_LOG_ERROR,
  718. "setsockopt() failed for "
  719. "NODELAY (%s)",
  720. strerror (errno));
  721. }
  722. }
  723. if (priv->keepalive) {
  724. ret = __socket_keepalive (new_sock,
  725. priv->keepaliveintvl);
  726. if (ret == -1)
  727. gf_log (this->xl->name, GF_LOG_ERROR,
  728. "Failed to set keep-alive: %s",
  729. strerror (errno));
  730. }
  731. new_trans = GF_CALLOC (1, sizeof (*new_trans),
  732. gf_common_mt_transport_t);
  733. new_trans->xl = this->xl;
  734. new_trans->fini = this->fini;
  735. memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
  736. addrlen);
  737. new_trans->peerinfo.sockaddr_len = addrlen;
  738. new_trans->myinfo.sockaddr_len =
  739. sizeof (new_trans->myinfo.sockaddr);
  740. ret = getsockname (new_sock,
  741. SA (&new_trans->myinfo.sockaddr),
  742. &new_trans->myinfo.sockaddr_len);
  743. if (ret == -1) {
  744. gf_log (this->xl->name, GF_LOG_DEBUG,
  745. "getsockname on %d failed (%s)",
  746. new_sock, strerror (errno));
  747. close (new_sock);
  748. goto unlock;
  749. }
  750. gf_get_transport_identifiers (new_trans);
  751. socket_init (new_trans);
  752. new_trans->ops = this->ops;
  753. new_trans->init = this->init;
  754. new_trans->fini = this->fini;
  755. new_priv = new_trans->private;
  756. pthread_mutex_lock (&new_priv->lock);
  757. {
  758. new_priv->sock = new_sock;
  759. new_priv->connected = 1;
  760. transport_ref (new_trans);
  761. new_priv->idx =
  762. event_register (ctx->event_pool,
  763. new_sock,
  764. socket_event_handler,
  765. new_trans, 1, 0);
  766. if (new_priv->idx == -1)
  767. ret = -1;
  768. }
  769. pthread_mutex_unlock (&new_priv->lock);
  770. }
  771. }
  772. unlock:
  773. pthread_mutex_unlock (&priv->lock);
  774. return ret;
  775. }
  776. static int
  777. socket_disconnect (transport_t *this)
  778. {
  779. socket_private_t *priv = NULL;
  780. int ret = -1;
  781. priv = this->private;
  782. pthread_mutex_lock (&priv->lock);
  783. {
  784. ret = __socket_disconnect (this);
  785. }
  786. pthread_mutex_unlock (&priv->lock);
  787. return ret;
  788. }
  789. static int
  790. socket_connect (transport_t *this)
  791. {
  792. int ret = -1;
  793. int sock = -1;
  794. socket_private_t *priv = NULL;
  795. struct sockaddr_storage sockaddr = {0, };
  796. socklen_t sockaddr_len = 0;
  797. glusterfs_ctx_t *ctx = NULL;
  798. sa_family_t sa_family = {0, };
  799. priv = this->private;
  800. ctx = this->xl->ctx;
  801. if (!priv) {
  802. gf_log (this->xl->name, GF_LOG_DEBUG,
  803. "connect() called on uninitialized transport");
  804. goto err;
  805. }
  806. pthread_mutex_lock (&priv->lock);
  807. {
  808. sock = priv->sock;
  809. }
  810. pthread_mutex_unlock (&priv->lock);
  811. if (sock != -1) {
  812. gf_log (this->xl->name, GF_LOG_TRACE,
  813. "connect () called on transport already connected");
  814. ret = 0;
  815. goto err;
  816. }
  817. ret = gf_socket_client_get_remote_sockaddr (this, SA (&sockaddr),
  818. &sockaddr_len, &sa_family);
  819. if (ret == -1) {
  820. /* logged inside client_get_remote_sockaddr */
  821. goto err;
  822. }
  823. pthread_mutex_lock (&priv->lock);
  824. {
  825. if (priv->sock != -1) {
  826. gf_log (this->xl->name, GF_LOG_TRACE,
  827. "connect() -- already connected");
  828. goto unlock;
  829. }
  830. memcpy (&this->peerinfo.sockaddr, &sockaddr, sockaddr_len);
  831. this->peerinfo.sockaddr_len = sockaddr_len;
  832. priv->sock = socket (sa_family, SOCK_STREAM, 0);
  833. if (priv->sock == -1) {
  834. gf_log (this->xl->name, GF_LOG_ERROR,
  835. "socket creation failed (%s)",
  836. strerror (errno));
  837. goto unlock;
  838. }
  839. /* Cant help if setting socket options fails. We can continue
  840. * working nonetheless.
  841. */
  842. if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
  843. &priv->windowsize,
  844. sizeof (priv->windowsize)) < 0) {
  845. gf_log (this->xl->name, GF_LOG_ERROR,
  846. "setting receive window size failed: %d: %d: "
  847. "%s", priv->sock, priv->windowsize,
  848. strerror (errno));
  849. }
  850. if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
  851. &priv->windowsize,
  852. sizeof (priv->windowsize)) < 0) {
  853. gf_log (this->xl->name, GF_LOG_ERROR,
  854. "setting send window size failed: %d: %d: "
  855. "%s", priv->sock, priv->windowsize,
  856. strerror (errno));
  857. }
  858. if (priv->nodelay && priv->lowlat) {
  859. ret = __socket_nodelay (priv->sock);
  860. if (ret == -1) {
  861. gf_log (this->xl->name, GF_LOG_ERROR,
  862. "setsockopt() failed for NODELAY (%s)",
  863. strerror (errno));
  864. }
  865. }
  866. if (!priv->bio) {
  867. ret = __socket_nonblock (priv->sock);
  868. if (ret == -1) {
  869. gf_log (this->xl->name, GF_LOG_ERROR,
  870. "NBIO on %d failed (%s)",
  871. priv->sock, strerror (errno));
  872. close (priv->sock);
  873. priv->sock = -1;
  874. goto unlock;
  875. }
  876. }
  877. if (priv->keepalive) {
  878. ret = __socket_keepalive (priv->sock,
  879. priv->keepaliveintvl);
  880. if (ret == -1)
  881. gf_log (this->xl->name, GF_LOG_ERROR,
  882. "Failed to set keep-alive: %s",
  883. strerror (errno));
  884. }
  885. SA (&this->myinfo.sockaddr)->sa_family =
  886. SA (&this->peerinfo.sockaddr)->sa_family;
  887. ret = gf_client_bind (this, SA (&this->myinfo.sockaddr),
  888. &this->myinfo.sockaddr_len, priv->sock);
  889. if (ret == -1) {
  890. gf_log (this->xl->name, GF_LOG_WARNING,
  891. "client bind failed: %s", strerror (errno));
  892. close (priv->sock);
  893. priv->sock = -1;
  894. goto unlock;
  895. }
  896. ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
  897. this->peerinfo.sockaddr_len);
  898. if (ret == -1 && errno != EINPROGRESS) {
  899. gf_log (this->xl->name, GF_LOG_ERROR,
  900. "connection attempt failed (%s)",
  901. strerror (errno));
  902. close (priv->sock);
  903. priv->sock = -1;
  904. goto unlock;
  905. }
  906. priv->connected = 0;
  907. transport_ref (this);
  908. priv->idx = event_register (ctx->event_pool, priv->sock,
  909. socket_event_handler, this, 1, 1);
  910. if (priv->idx == -1)
  911. ret = -1;
  912. }
  913. unlock:
  914. pthread_mutex_unlock (&priv->lock);
  915. err:
  916. return ret;
  917. }
  918. static int
  919. socket_listen (transport_t *this)
  920. {
  921. socket_private_t * priv = NULL;
  922. int ret = -1;
  923. int sock = -1;
  924. struct sockaddr_storage sockaddr;
  925. socklen_t sockaddr_len;
  926. peer_info_t *myinfo = NULL;
  927. glusterfs_ctx_t *ctx = NULL;
  928. sa_family_t sa_family = {0, };
  929. priv = this->private;
  930. myinfo = &this->myinfo;
  931. ctx = this->xl->ctx;
  932. pthread_mutex_lock (&priv->lock);
  933. {
  934. sock = priv->sock;
  935. }
  936. pthread_mutex_unlock (&priv->lock);
  937. if (sock != -1) {
  938. gf_log (this->xl->name, GF_LOG_DEBUG,
  939. "already listening");
  940. return ret;
  941. }
  942. ret = gf_socket_server_get_local_sockaddr (this, SA (&sockaddr),
  943. &sockaddr_len, &sa_family);
  944. if (ret == -1) {
  945. return ret;
  946. }
  947. pthread_mutex_lock (&priv->lock);
  948. {
  949. if (priv->sock != -1) {
  950. gf_log (this->xl->name, GF_LOG_DEBUG,
  951. "already listening");
  952. goto unlock;
  953. }
  954. memcpy (&myinfo->sockaddr, &sockaddr, sockaddr_len);
  955. myinfo->sockaddr_len = sockaddr_len;
  956. priv->sock = socket (sa_family, SOCK_STREAM, 0);
  957. if (priv->sock == -1) {
  958. gf_log (this->xl->name, GF_LOG_ERROR,
  959. "socket creation failed (%s)",
  960. strerror (errno));
  961. goto unlock;
  962. }
  963. /* Cant help if setting socket options fails. We can continue
  964. * working nonetheless.
  965. */
  966. if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
  967. &priv->windowsize,
  968. sizeof (priv->windowsize)) < 0) {
  969. gf_log (this->xl->name, GF_LOG_ERROR,
  970. "setting receive window size failed: %d: %d: "
  971. "%s", priv->sock, priv->windowsize,
  972. strerror (errno));
  973. }
  974. if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
  975. &priv->windowsize,
  976. sizeof (priv->windowsize)) < 0) {
  977. gf_log (this->xl->name, GF_LOG_ERROR,
  978. "setting send window size failed: %d: %d: "
  979. "%s", priv->sock, priv->windowsize,
  980. strerror (errno));
  981. }
  982. if (priv->nodelay) {
  983. ret = __socket_nodelay (priv->sock);
  984. if (ret == -1) {
  985. gf_log (this->xl->name, GF_LOG_ERROR,
  986. "setsockopt() failed for NODELAY (%s)",
  987. strerror (errno));
  988. }
  989. }
  990. if (!priv->bio) {
  991. ret = __socket_nonblock (priv->sock);
  992. if (ret == -1) {
  993. gf_log (this->xl->name, GF_LOG_ERROR,
  994. "NBIO on %d failed (%s)",
  995. priv->sock, strerror (errno));
  996. close (priv->sock);
  997. priv->sock = -1;
  998. goto unlock;
  999. }
  1000. }
  1001. ret = __socket_server_bind (this);
  1002. if (ret == -1) {
  1003. /* logged inside __socket_server_bind() */
  1004. close (priv->sock);
  1005. priv->sock = -1;
  1006. goto unlock;
  1007. }
  1008. ret = listen (priv->sock, 10);
  1009. if (ret == -1) {
  1010. gf_log (this->xl->name, GF_LOG_ERROR,
  1011. "could not set socket %d to listen mode (%s)",
  1012. priv->sock, strerror (errno));
  1013. close (priv->sock);
  1014. priv->sock = -1;
  1015. goto unlock;
  1016. }
  1017. transport_ref (this);
  1018. priv->idx = event_register (ctx->event_pool, priv->sock,
  1019. socket_server_event_handler,
  1020. this, 1, 0);
  1021. if (priv->idx == -1) {
  1022. gf_log (this->xl->name, GF_LOG_DEBUG,
  1023. "could not register socket %d with events",
  1024. priv->sock);
  1025. ret = -1;
  1026. close (priv->sock);
  1027. priv->sock = -1;
  1028. goto unlock;
  1029. }
  1030. }
  1031. unlock:
  1032. pthread_mutex_unlock (&priv->lock);
  1033. return ret;
  1034. }
  1035. static int
  1036. socket_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
  1037. struct iobuf **iobuf_p)
  1038. {
  1039. socket_private_t *priv = NULL;
  1040. int ret = -1;
  1041. priv = this->private;
  1042. pthread_mutex_lock (&priv->lock);
  1043. {
  1044. if (priv->connected != 1) {
  1045. gf_log (this->xl->name, GF_LOG_DEBUG,
  1046. "socket not connected to receive");
  1047. goto unlock;
  1048. }
  1049. if (!hdr_p || !hdrlen_p || !iobuf_p) {
  1050. gf_log (this->xl->name, GF_LOG_DEBUG,
  1051. "bad parameters %p %p %p",
  1052. hdr_p, hdrlen_p, iobuf_p);
  1053. goto unlock;
  1054. }
  1055. if (priv->incoming.state == SOCKET_PROTO_STATE_COMPLETE) {
  1056. *hdr_p = priv->incoming.hdr_p;
  1057. *hdrlen_p = priv->incoming.hdrlen;
  1058. *iobuf_p = priv->incoming.iobuf;
  1059. memset (&priv->incoming, 0, sizeof (priv->incoming));
  1060. priv->incoming.state = SOCKET_PROTO_STATE_NADA;
  1061. ret = 0;
  1062. }
  1063. }
  1064. unlock:
  1065. pthread_mutex_unlock (&priv->lock);
  1066. return ret;
  1067. }
  1068. /* TODO: implement per transfer limit */
  1069. static int
  1070. socket_submit (transport_t *this, char *buf, int len,
  1071. struct iovec *vector, int count,
  1072. struct iobref *iobref)
  1073. {
  1074. socket_private_t *priv = NULL;
  1075. int ret = -1;
  1076. char need_poll_out = 0;
  1077. char need_append = 1;
  1078. struct ioq *entry = NULL;
  1079. glusterfs_ctx_t *ctx = NULL;
  1080. priv = this->private;
  1081. ctx = this->xl->ctx;
  1082. pthread_mutex_lock (&priv->lock);
  1083. {
  1084. if (priv->connected != 1) {
  1085. if (!priv->submit_log && !priv->connect_finish_log) {
  1086. gf_log (this->xl->name, GF_LOG_DEBUG,
  1087. "not connected (priv->connected = %d)",
  1088. priv->connected);
  1089. priv->submit_log = 1;
  1090. }
  1091. goto unlock;
  1092. }
  1093. priv->submit_log = 0;
  1094. entry = __socket_ioq_new (this, buf, len, vector, count, iobref);
  1095. if (!entry)
  1096. goto unlock;
  1097. if (list_empty (&priv->ioq)) {
  1098. ret = __socket_ioq_churn_entry (this, entry);
  1099. if (ret == 0)
  1100. need_append = 0;
  1101. if (ret > 0)
  1102. need_poll_out = 1;
  1103. }
  1104. if (need_append) {
  1105. list_add_tail (&entry->list, &priv->ioq);
  1106. ret = 0;
  1107. }
  1108. if (need_poll_out) {
  1109. /* first entry to wait. continue writing on POLLOUT */
  1110. priv->idx = event_select_on (ctx->event_pool,
  1111. priv->sock,
  1112. priv->idx, -1, 1);
  1113. }
  1114. }
  1115. unlock:
  1116. pthread_mutex_unlock (&priv->lock);
  1117. return ret;
  1118. }
  1119. struct transport_ops tops = {
  1120. .listen = socket_listen,
  1121. .connect = socket_connect,
  1122. .disconnect = socket_disconnect,
  1123. .submit = socket_submit,
  1124. .receive = socket_receive
  1125. };
  1126. static int
  1127. socket_init (transport_t *this)
  1128. {
  1129. socket_private_t *priv = NULL;
  1130. gf_boolean_t tmp_bool = 0;
  1131. uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
  1132. char *optstr = NULL;
  1133. uint32_t keepalive = 0;
  1134. if (this->private) {
  1135. gf_log (this->xl->name, GF_LOG_DEBUG,
  1136. "double init attempted");
  1137. return -1;
  1138. }
  1139. priv = GF_CALLOC (1, sizeof (*priv),
  1140. gf_common_mt_socket_private_t);
  1141. if (!priv) {
  1142. gf_log (this->xl->name, GF_LOG_ERROR,
  1143. "calloc (1, %"GF_PRI_SIZET") returned NULL",
  1144. sizeof (*priv));
  1145. return -1;
  1146. }
  1147. pthread_mutex_init (&priv->lock, NULL);
  1148. priv->sock = -1;
  1149. priv->idx = -1;
  1150. priv->connected = -1;
  1151. INIT_LIST_HEAD (&priv->ioq);
  1152. if (dict_get (this->xl->options, "non-blocking-io")) {
  1153. optstr = data_to_str (dict_get (this->xl->options,
  1154. "non-blocking-io"));
  1155. if (gf_string2boolean (optstr, &tmp_bool) == -1) {
  1156. gf_log (this->xl->name, GF_LOG_ERROR,
  1157. "'non-blocking-io' takes only boolean options,"
  1158. " not taking any action");
  1159. tmp_bool = 1;
  1160. }
  1161. priv->bio = 0;
  1162. if (!tmp_bool) {
  1163. priv->bio = 1;
  1164. gf_log (this->xl->name, GF_LOG_WARNING,
  1165. "disabling non-blocking IO");
  1166. }
  1167. }
  1168. optstr = NULL;
  1169. // By default, we enable NODELAY
  1170. priv->nodelay = 1;
  1171. if (dict_get (this->xl->options, "transport.socket.nodelay")) {
  1172. optstr = data_to_str (dict_get (this->xl->options,
  1173. "transport.socket.nodelay"));
  1174. if (gf_string2boolean (optstr, &tmp_bool) == -1) {
  1175. gf_log (this->xl->name, GF_LOG_ERROR,
  1176. "'transport.socket.nodelay' takes only "
  1177. "boolean options, not taking any action");
  1178. tmp_bool = 1;
  1179. }
  1180. if (!tmp_bool) {
  1181. priv->nodelay = 0;
  1182. gf_log (this->xl->name, GF_LOG_DEBUG,
  1183. "disabling nodelay");
  1184. }
  1185. }
  1186. optstr = NULL;
  1187. if (dict_get_str (this->xl->options, "transport.window-size",
  1188. &optstr) == 0) {
  1189. if (gf_string2bytesize (optstr, &windowsize) != 0) {
  1190. gf_log (this->xl->name, GF_LOG_ERROR,
  1191. "invalid number format: %s", optstr);
  1192. return -1;
  1193. }
  1194. }
  1195. optstr = NULL;
  1196. if (dict_get_str (this->xl->options, "transport.socket.lowlat",
  1197. &optstr) == 0) {
  1198. priv->lowlat = 1;
  1199. }
  1200. /* Enable Keep-alive by default. */
  1201. priv->keepalive = 1;
  1202. priv->keepaliveintvl = GF_USE_DEFAULT_KEEPALIVE;
  1203. if (dict_get_str (this->xl->options, "transport.socket.keepalive",
  1204. &optstr) == 0) {
  1205. if (gf_string2boolean (optstr, &tmp_bool) == -1) {
  1206. gf_log (this->xl->name, GF_LOG_ERROR,
  1207. "'transport.socket.keepalive' takes only "
  1208. "boolean options, not taking any action");
  1209. tmp_bool = 1;
  1210. }
  1211. if (!tmp_bool)
  1212. priv->keepalive = 0;
  1213. }
  1214. if (dict_get_uint32 (this->xl->options,
  1215. "transport.socket.keepalive-interval",
  1216. &keepalive) == 0) {
  1217. priv->keepaliveintvl = keepalive;
  1218. }
  1219. priv->windowsize = (int)windowsize;
  1220. this->private = priv;
  1221. return 0;
  1222. }
  1223. void
  1224. fini (transport_t *this)
  1225. {
  1226. socket_private_t *priv = this->private;
  1227. if (!priv)
  1228. return;
  1229. this->private = NULL;
  1230. gf_log (this->xl->name, GF_LOG_TRACE,
  1231. "transport %p destroyed", this);
  1232. pthread_mutex_destroy (&priv->lock);
  1233. GF_FREE (priv);
  1234. }
  1235. int32_t
  1236. init (transport_t *this)
  1237. {
  1238. int ret = -1;
  1239. ret = socket_init (this);
  1240. if (ret == -1) {
  1241. gf_log (this->xl->name, GF_LOG_DEBUG, "socket_init() failed");
  1242. }
  1243. return ret;
  1244. }
  1245. struct volume_options options[] = {
  1246. { .key = {"remote-port",
  1247. "transport.remote-port",
  1248. "transport.socket.remote-port"},
  1249. .type = GF_OPTION_TYPE_INT
  1250. },
  1251. { .key = {"transport.socket.listen-port", "listen-port"},
  1252. .type = GF_OPTION_TYPE_INT
  1253. },
  1254. { .key = {"transport.socket.bind-address", "bind-address" },
  1255. .type = GF_OPTION_TYPE_INTERNET_ADDRESS
  1256. },
  1257. { .key = {"transport.socket.connect-path", "connect-path"},
  1258. .type = GF_OPTION_TYPE_ANY
  1259. },
  1260. { .key = {"transport.socket.bind-path", "bind-path"},
  1261. .type = GF_OPTION_TYPE_ANY
  1262. },
  1263. { .key = {"transport.socket.listen-path", "listen-path"},
  1264. .type = GF_OPTION_TYPE_ANY
  1265. },
  1266. { .key = { "transport.address-family",
  1267. "address-family" },
  1268. .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
  1269. "unix", "inet-sdp" },
  1270. .type = GF_OPTION_TYPE_STR
  1271. },
  1272. { .key = {"non-blocking-io"},
  1273. .type = GF_OPTION_TYPE_BOOL
  1274. },
  1275. { .key = {"transport.window-size"},
  1276. .type = GF_OPTION_TYPE_SIZET,
  1277. .min = GF_MIN_SOCKET_WINDOW_SIZE,
  1278. .max = GF_MAX_SOCKET_WINDOW_SIZE,
  1279. },
  1280. { .key = {"transport.socket.nodelay"},
  1281. .type = GF_OPTION_TYPE_BOOL
  1282. },
  1283. { .key = {"transport.socket.lowlat"},
  1284. .type = GF_OPTION_TYPE_BOOL
  1285. },
  1286. { .key = {"transport.socket.keepalive"},
  1287. .type = GF_OPTION_TYPE_BOOL
  1288. },
  1289. { .key = {"transport.socket.keepalive-interval"},
  1290. .type = GF_OPTION_TYPE_INT
  1291. },
  1292. { .key = {NULL} }
  1293. };