PageRenderTime 62ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/rpc/rpc-transport/socket/src/socket.c

https://github.com/dopry/glusterfs
C | 2808 lines | 2135 code | 576 blank | 97 comment | 425 complexity | b473361e0e485f7043b6ed31936626bf MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.0, GPL-2.0, Apache-2.0, BSD-3-Clause
  1. /*
  2. Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
  3. This file is part of GlusterFS.
  4. GlusterFS is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published
  6. by the Free Software Foundation; either version 3 of the License,
  7. or (at your option) any later version.
  8. GlusterFS is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see
  14. <http://www.gnu.org/licenses/>.
  15. */
  16. #ifndef _CONFIG_H
  17. #define _CONFIG_H
  18. #include "config.h"
  19. #endif
  20. #include "socket.h"
  21. #include "name.h"
  22. #include "dict.h"
  23. #include "rpc-transport.h"
  24. #include "logging.h"
  25. #include "xlator.h"
  26. #include "byte-order.h"
  27. #include "common-utils.h"
  28. #include "compat-errno.h"
  29. /* ugly #includes below */
  30. #include "protocol-common.h"
  31. #include "glusterfs3-xdr.h"
  32. #include "xdr-nfs3.h"
  33. #include "rpcsvc.h"
  34. #include <fcntl.h>
  35. #include <errno.h>
  36. #include <netinet/tcp.h>
  37. #include <rpc/xdr.h>
  38. #define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR)
  39. #define SA(ptr) ((struct sockaddr *)ptr)
  40. #define __socket_proto_reset_pending(priv) do { \
  41. memset (&priv->incoming.frag.vector, 0, \
  42. sizeof (priv->incoming.frag.vector)); \
  43. priv->incoming.frag.pending_vector = \
  44. &priv->incoming.frag.vector; \
  45. priv->incoming.frag.pending_vector->iov_base = \
  46. priv->incoming.frag.fragcurrent; \
  47. priv->incoming.pending_vector = \
  48. priv->incoming.frag.pending_vector; \
  49. } while (0);
  50. #define __socket_proto_update_pending(priv) \
  51. do { \
  52. uint32_t remaining_fragsize = 0; \
  53. if (priv->incoming.frag.pending_vector->iov_len == 0) { \
  54. remaining_fragsize = RPC_FRAGSIZE (priv->incoming.fraghdr) \
  55. - priv->incoming.frag.bytes_read; \
  56. \
  57. priv->incoming.frag.pending_vector->iov_len = \
  58. remaining_fragsize > priv->incoming.frag.remaining_size \
  59. ? priv->incoming.frag.remaining_size : remaining_fragsize; \
  60. \
  61. priv->incoming.frag.remaining_size -= \
  62. priv->incoming.frag.pending_vector->iov_len; \
  63. } \
  64. } while (0);
  65. #define __socket_proto_update_priv_after_read(priv, ret, bytes_read) \
  66. { \
  67. priv->incoming.frag.fragcurrent += bytes_read; \
  68. priv->incoming.frag.bytes_read += bytes_read; \
  69. \
  70. if ((ret > 0) || (priv->incoming.frag.remaining_size != 0)) { \
  71. if (priv->incoming.frag.remaining_size != 0 && ret == 0) { \
  72. __socket_proto_reset_pending (priv); \
  73. } \
  74. \
  75. gf_log (this->name, GF_LOG_TRACE, "partial read on non-blocking socket"); \
  76. \
  77. break; \
  78. } \
  79. }
  80. #define __socket_proto_init_pending(priv, size) \
  81. do { \
  82. uint32_t remaining_fragsize = 0; \
  83. remaining_fragsize = RPC_FRAGSIZE (priv->incoming.fraghdr) \
  84. - priv->incoming.frag.bytes_read; \
  85. \
  86. __socket_proto_reset_pending (priv); \
  87. \
  88. priv->incoming.frag.pending_vector->iov_len = \
  89. remaining_fragsize > size ? size : remaining_fragsize; \
  90. \
  91. priv->incoming.frag.remaining_size = \
  92. size - priv->incoming.frag.pending_vector->iov_len; \
  93. \
  94. } while (0);
  95. /* This will be used in a switch case and breaks from the switch case if all
  96. * the pending data is not read.
  97. */
  98. #define __socket_proto_read(priv, ret) \
  99. { \
  100. size_t bytes_read = 0; \
  101. \
  102. __socket_proto_update_pending (priv); \
  103. \
  104. ret = __socket_readv (this, \
  105. priv->incoming.pending_vector, 1, \
  106. &priv->incoming.pending_vector, \
  107. &priv->incoming.pending_count, \
  108. &bytes_read); \
  109. if (ret == -1) { \
  110. gf_log (this->name, GF_LOG_WARNING, \
  111. "reading from socket failed. Error (%s), " \
  112. "peer (%s)", strerror (errno), \
  113. this->peerinfo.identifier); \
  114. break; \
  115. } \
  116. __socket_proto_update_priv_after_read (priv, ret, bytes_read); \
  117. }
  118. int socket_init (rpc_transport_t *this);
  119. /*
  120. * return value:
  121. * 0 = success (completed)
  122. * -1 = error
  123. * > 0 = incomplete
  124. */
  125. int
  126. __socket_rwv (rpc_transport_t *this, struct iovec *vector, int count,
  127. struct iovec **pending_vector, int *pending_count, size_t *bytes,
  128. int write)
  129. {
  130. socket_private_t *priv = NULL;
  131. int sock = -1;
  132. int ret = -1;
  133. struct iovec *opvector = NULL;
  134. int opcount = 0;
  135. int moved = 0;
  136. GF_VALIDATE_OR_GOTO ("socket", this, out);
  137. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  138. priv = this->private;
  139. sock = priv->sock;
  140. opvector = vector;
  141. opcount = count;
  142. if (bytes != NULL) {
  143. *bytes = 0;
  144. }
  145. while (opcount) {
  146. if (write) {
  147. ret = writev (sock, opvector, opcount);
  148. if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
  149. /* done for now */
  150. break;
  151. }
  152. this->total_bytes_write += ret;
  153. } else {
  154. ret = readv (sock, opvector, opcount);
  155. if (ret == -1 && errno == EAGAIN) {
  156. /* done for now */
  157. break;
  158. }
  159. this->total_bytes_read += ret;
  160. }
  161. if (ret == 0) {
  162. /* Mostly due to 'umount' in client */
  163. gf_log (this->name, GF_LOG_DEBUG,
  164. "EOF from peer %s", this->peerinfo.identifier);
  165. opcount = -1;
  166. errno = ENOTCONN;
  167. break;
  168. }
  169. if (ret == -1) {
  170. if (errno == EINTR)
  171. continue;
  172. gf_log (this->name, GF_LOG_WARNING,
  173. "%s failed (%s)", write ? "writev" : "readv",
  174. strerror (errno));
  175. opcount = -1;
  176. break;
  177. }
  178. if (bytes != NULL) {
  179. *bytes += ret;
  180. }
  181. moved = 0;
  182. while (moved < ret) {
  183. if ((ret - moved) >= opvector[0].iov_len) {
  184. moved += opvector[0].iov_len;
  185. opvector++;
  186. opcount--;
  187. } else {
  188. opvector[0].iov_len -= (ret - moved);
  189. opvector[0].iov_base += (ret - moved);
  190. moved += (ret - moved);
  191. }
  192. while (opcount && !opvector[0].iov_len) {
  193. opvector++;
  194. opcount--;
  195. }
  196. }
  197. }
  198. if (pending_vector)
  199. *pending_vector = opvector;
  200. if (pending_count)
  201. *pending_count = opcount;
  202. out:
  203. return opcount;
  204. }
  205. int
  206. __socket_readv (rpc_transport_t *this, struct iovec *vector, int count,
  207. struct iovec **pending_vector, int *pending_count,
  208. size_t *bytes)
  209. {
  210. int ret = -1;
  211. ret = __socket_rwv (this, vector, count,
  212. pending_vector, pending_count, bytes, 0);
  213. return ret;
  214. }
  215. int
  216. __socket_writev (rpc_transport_t *this, struct iovec *vector, int count,
  217. struct iovec **pending_vector, int *pending_count)
  218. {
  219. int ret = -1;
  220. ret = __socket_rwv (this, vector, count,
  221. pending_vector, pending_count, NULL, 1);
  222. return ret;
  223. }
  224. int
  225. __socket_disconnect (rpc_transport_t *this)
  226. {
  227. socket_private_t *priv = NULL;
  228. int ret = -1;
  229. GF_VALIDATE_OR_GOTO ("socket", this, out);
  230. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  231. priv = this->private;
  232. if (priv->sock != -1) {
  233. priv->connected = -1;
  234. ret = shutdown (priv->sock, SHUT_RDWR);
  235. if (ret) {
  236. /* its already disconnected.. no need to understand
  237. why it failed to shutdown in normal cases */
  238. gf_log (this->name, GF_LOG_DEBUG,
  239. "shutdown() returned %d. %s",
  240. ret, strerror (errno));
  241. }
  242. }
  243. out:
  244. return ret;
  245. }
  246. int
  247. __socket_server_bind (rpc_transport_t *this)
  248. {
  249. socket_private_t *priv = NULL;
  250. int ret = -1;
  251. int opt = 1;
  252. int reuse_check_sock = -1;
  253. struct sockaddr_storage unix_addr = {0};
  254. GF_VALIDATE_OR_GOTO ("socket", this, out);
  255. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  256. priv = this->private;
  257. ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR,
  258. &opt, sizeof (opt));
  259. if (ret == -1) {
  260. gf_log (this->name, GF_LOG_ERROR,
  261. "setsockopt() for SO_REUSEADDR failed (%s)",
  262. strerror (errno));
  263. }
  264. /* reuse-address doesn't work for unix type sockets */
  265. if (AF_UNIX == SA (&this->myinfo.sockaddr)->sa_family) {
  266. memcpy (&unix_addr, SA (&this->myinfo.sockaddr),
  267. this->myinfo.sockaddr_len);
  268. reuse_check_sock = socket (AF_UNIX, SOCK_STREAM, 0);
  269. if (reuse_check_sock > 0) {
  270. ret = connect (reuse_check_sock, SA (&unix_addr),
  271. this->myinfo.sockaddr_len);
  272. if ((ret == -1) && (ECONNREFUSED == errno)) {
  273. unlink (((struct sockaddr_un*)&unix_addr)->sun_path);
  274. }
  275. close (reuse_check_sock);
  276. }
  277. }
  278. ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
  279. this->myinfo.sockaddr_len);
  280. if (ret == -1) {
  281. gf_log (this->name, GF_LOG_ERROR,
  282. "binding to %s failed: %s",
  283. this->myinfo.identifier, strerror (errno));
  284. if (errno == EADDRINUSE) {
  285. gf_log (this->name, GF_LOG_ERROR,
  286. "Port is already in use");
  287. }
  288. }
  289. out:
  290. return ret;
  291. }
  292. int
  293. __socket_nonblock (int fd)
  294. {
  295. int flags = 0;
  296. int ret = -1;
  297. flags = fcntl (fd, F_GETFL);
  298. if (flags != -1)
  299. ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
  300. return ret;
  301. }
  302. int
  303. __socket_nodelay (int fd)
  304. {
  305. int on = 1;
  306. int ret = -1;
  307. ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY,
  308. &on, sizeof (on));
  309. if (!ret)
  310. gf_log (THIS->name, GF_LOG_TRACE,
  311. "NODELAY enabled for socket %d", fd);
  312. return ret;
  313. }
  314. static int
  315. __socket_keepalive (int fd, int keepalive_intvl, int keepalive_idle)
  316. {
  317. int on = 1;
  318. int ret = -1;
  319. ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof (on));
  320. if (ret == -1) {
  321. gf_log ("socket", GF_LOG_WARNING,
  322. "failed to set keep alive option on socket %d", fd);
  323. goto err;
  324. }
  325. if (keepalive_intvl == GF_USE_DEFAULT_KEEPALIVE)
  326. goto done;
  327. #if !defined(GF_LINUX_HOST_OS) && !defined(__NetBSD__)
  328. #ifdef GF_SOLARIS_HOST_OS
  329. ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive_intvl,
  330. sizeof (keepalive_intvl));
  331. #else
  332. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_intvl,
  333. sizeof (keepalive_intvl));
  334. #endif
  335. if (ret == -1) {
  336. gf_log ("socket", GF_LOG_WARNING,
  337. "failed to set keep alive interval on socket %d", fd);
  338. goto err;
  339. }
  340. #else
  341. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_idle,
  342. sizeof (keepalive_intvl));
  343. if (ret == -1) {
  344. gf_log ("socket", GF_LOG_WARNING,
  345. "failed to set keep idle on socket %d", fd);
  346. goto err;
  347. }
  348. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepalive_intvl,
  349. sizeof (keepalive_intvl));
  350. if (ret == -1) {
  351. gf_log ("socket", GF_LOG_WARNING,
  352. "failed to set keep alive interval on socket %d", fd);
  353. goto err;
  354. }
  355. #endif
  356. done:
  357. gf_log (THIS->name, GF_LOG_TRACE, "Keep-alive enabled for socket %d, interval "
  358. "%d, idle: %d", fd, keepalive_intvl, keepalive_idle);
  359. err:
  360. return ret;
  361. }
  362. int
  363. __socket_connect_finish (int fd)
  364. {
  365. int ret = -1;
  366. int optval = 0;
  367. socklen_t optlen = sizeof (int);
  368. ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
  369. if (ret == 0 && optval) {
  370. errno = optval;
  371. ret = -1;
  372. }
  373. return ret;
  374. }
  375. void
  376. __socket_reset (rpc_transport_t *this)
  377. {
  378. socket_private_t *priv = NULL;
  379. GF_VALIDATE_OR_GOTO ("socket", this, out);
  380. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  381. priv = this->private;
  382. /* TODO: use mem-pool on incoming data */
  383. if (priv->incoming.iobref) {
  384. iobref_unref (priv->incoming.iobref);
  385. priv->incoming.iobref = NULL;
  386. }
  387. if (priv->incoming.iobuf) {
  388. iobuf_unref (priv->incoming.iobuf);
  389. }
  390. if (priv->incoming.request_info != NULL) {
  391. GF_FREE (priv->incoming.request_info);
  392. }
  393. memset (&priv->incoming, 0, sizeof (priv->incoming));
  394. event_unregister (this->ctx->event_pool, priv->sock, priv->idx);
  395. close (priv->sock);
  396. priv->sock = -1;
  397. priv->idx = -1;
  398. priv->connected = -1;
  399. out:
  400. return;
  401. }
  402. void
  403. socket_set_lastfrag (uint32_t *fragsize) {
  404. (*fragsize) |= 0x80000000U;
  405. }
  406. void
  407. socket_set_frag_header_size (uint32_t size, char *haddr)
  408. {
  409. size = htonl (size);
  410. memcpy (haddr, &size, sizeof (size));
  411. }
  412. void
  413. socket_set_last_frag_header_size (uint32_t size, char *haddr)
  414. {
  415. socket_set_lastfrag (&size);
  416. socket_set_frag_header_size (size, haddr);
  417. }
  418. struct ioq *
  419. __socket_ioq_new (rpc_transport_t *this, rpc_transport_msg_t *msg)
  420. {
  421. struct ioq *entry = NULL;
  422. int count = 0;
  423. uint32_t size = 0;
  424. GF_VALIDATE_OR_GOTO ("socket", this, out);
  425. /* TODO: use mem-pool */
  426. entry = GF_CALLOC (1, sizeof (*entry), gf_common_mt_ioq);
  427. if (!entry)
  428. return NULL;
  429. count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
  430. GF_ASSERT (count <= (MAX_IOVEC - 1));
  431. size = iov_length (msg->rpchdr, msg->rpchdrcount)
  432. + iov_length (msg->proghdr, msg->proghdrcount)
  433. + iov_length (msg->progpayload, msg->progpayloadcount);
  434. if (size > RPC_MAX_FRAGMENT_SIZE) {
  435. gf_log (this->name, GF_LOG_ERROR,
  436. "msg size (%u) bigger than the maximum allowed size on "
  437. "sockets (%u)", size, RPC_MAX_FRAGMENT_SIZE);
  438. GF_FREE (entry);
  439. return NULL;
  440. }
  441. socket_set_last_frag_header_size (size, (char *)&entry->fraghdr);
  442. entry->vector[0].iov_base = (char *)&entry->fraghdr;
  443. entry->vector[0].iov_len = sizeof (entry->fraghdr);
  444. entry->count = 1;
  445. if (msg->rpchdr != NULL) {
  446. memcpy (&entry->vector[1], msg->rpchdr,
  447. sizeof (struct iovec) * msg->rpchdrcount);
  448. entry->count += msg->rpchdrcount;
  449. }
  450. if (msg->proghdr != NULL) {
  451. memcpy (&entry->vector[entry->count], msg->proghdr,
  452. sizeof (struct iovec) * msg->proghdrcount);
  453. entry->count += msg->proghdrcount;
  454. }
  455. if (msg->progpayload != NULL) {
  456. memcpy (&entry->vector[entry->count], msg->progpayload,
  457. sizeof (struct iovec) * msg->progpayloadcount);
  458. entry->count += msg->progpayloadcount;
  459. }
  460. entry->pending_vector = entry->vector;
  461. entry->pending_count = entry->count;
  462. if (msg->iobref != NULL)
  463. entry->iobref = iobref_ref (msg->iobref);
  464. INIT_LIST_HEAD (&entry->list);
  465. out:
  466. return entry;
  467. }
  468. void
  469. __socket_ioq_entry_free (struct ioq *entry)
  470. {
  471. GF_VALIDATE_OR_GOTO ("socket", entry, out);
  472. list_del_init (&entry->list);
  473. if (entry->iobref)
  474. iobref_unref (entry->iobref);
  475. /* TODO: use mem-pool */
  476. GF_FREE (entry);
  477. out:
  478. return;
  479. }
  480. void
  481. __socket_ioq_flush (rpc_transport_t *this)
  482. {
  483. socket_private_t *priv = NULL;
  484. struct ioq *entry = NULL;
  485. GF_VALIDATE_OR_GOTO ("socket", this, out);
  486. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  487. priv = this->private;
  488. while (!list_empty (&priv->ioq)) {
  489. entry = priv->ioq_next;
  490. __socket_ioq_entry_free (entry);
  491. }
  492. out:
  493. return;
  494. }
  495. int
  496. __socket_ioq_churn_entry (rpc_transport_t *this, struct ioq *entry)
  497. {
  498. int ret = -1;
  499. ret = __socket_writev (this, entry->pending_vector,
  500. entry->pending_count,
  501. &entry->pending_vector,
  502. &entry->pending_count);
  503. if (ret == 0) {
  504. /* current entry was completely written */
  505. GF_ASSERT (entry->pending_count == 0);
  506. __socket_ioq_entry_free (entry);
  507. }
  508. return ret;
  509. }
  510. int
  511. __socket_ioq_churn (rpc_transport_t *this)
  512. {
  513. socket_private_t *priv = NULL;
  514. int ret = 0;
  515. struct ioq *entry = NULL;
  516. GF_VALIDATE_OR_GOTO ("socket", this, out);
  517. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  518. priv = this->private;
  519. while (!list_empty (&priv->ioq)) {
  520. /* pick next entry */
  521. entry = priv->ioq_next;
  522. ret = __socket_ioq_churn_entry (this, entry);
  523. if (ret != 0)
  524. break;
  525. }
  526. if (list_empty (&priv->ioq)) {
  527. /* all pending writes done, not interested in POLLOUT */
  528. priv->idx = event_select_on (this->ctx->event_pool,
  529. priv->sock, priv->idx, -1, 0);
  530. }
  531. out:
  532. return ret;
  533. }
  534. int
  535. socket_event_poll_err (rpc_transport_t *this)
  536. {
  537. socket_private_t *priv = NULL;
  538. int ret = -1;
  539. GF_VALIDATE_OR_GOTO ("socket", this, out);
  540. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  541. priv = this->private;
  542. pthread_mutex_lock (&priv->lock);
  543. {
  544. __socket_ioq_flush (this);
  545. __socket_reset (this);
  546. }
  547. pthread_mutex_unlock (&priv->lock);
  548. rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
  549. out:
  550. return ret;
  551. }
  552. int
  553. socket_event_poll_out (rpc_transport_t *this)
  554. {
  555. socket_private_t *priv = NULL;
  556. int ret = -1;
  557. GF_VALIDATE_OR_GOTO ("socket", this, out);
  558. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  559. priv = this->private;
  560. pthread_mutex_lock (&priv->lock);
  561. {
  562. if (priv->connected == 1) {
  563. ret = __socket_ioq_churn (this);
  564. if (ret == -1) {
  565. __socket_disconnect (this);
  566. }
  567. }
  568. }
  569. pthread_mutex_unlock (&priv->lock);
  570. ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_SENT, NULL);
  571. out:
  572. return ret;
  573. }
  574. inline int
  575. __socket_read_simple_msg (rpc_transport_t *this)
  576. {
  577. socket_private_t *priv = NULL;
  578. int ret = 0;
  579. uint32_t remaining_size = 0;
  580. size_t bytes_read = 0;
  581. GF_VALIDATE_OR_GOTO ("socket", this, out);
  582. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  583. priv = this->private;
  584. switch (priv->incoming.frag.simple_state) {
  585. case SP_STATE_SIMPLE_MSG_INIT:
  586. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  587. - priv->incoming.frag.bytes_read;
  588. __socket_proto_init_pending (priv, remaining_size);
  589. priv->incoming.frag.simple_state =
  590. SP_STATE_READING_SIMPLE_MSG;
  591. /* fall through */
  592. case SP_STATE_READING_SIMPLE_MSG:
  593. ret = 0;
  594. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  595. - priv->incoming.frag.bytes_read;
  596. if (remaining_size > 0) {
  597. ret = __socket_readv (this,
  598. priv->incoming.pending_vector, 1,
  599. &priv->incoming.pending_vector,
  600. &priv->incoming.pending_count,
  601. &bytes_read);
  602. }
  603. if (ret == -1) {
  604. gf_log (this->name, GF_LOG_WARNING,
  605. "reading from socket failed. Error (%s), "
  606. "peer (%s)", strerror (errno),
  607. this->peerinfo.identifier);
  608. break;
  609. }
  610. priv->incoming.frag.bytes_read += bytes_read;
  611. priv->incoming.frag.fragcurrent += bytes_read;
  612. if (ret > 0) {
  613. gf_log (this->name, GF_LOG_TRACE,
  614. "partial read on non-blocking socket.");
  615. break;
  616. }
  617. if (ret == 0) {
  618. priv->incoming.frag.simple_state
  619. = SP_STATE_SIMPLE_MSG_INIT;
  620. }
  621. }
  622. out:
  623. return ret;
  624. }
  625. inline int
  626. __socket_read_simple_request (rpc_transport_t *this)
  627. {
  628. return __socket_read_simple_msg (this);
  629. }
  630. #define rpc_cred_addr(buf) (buf + RPC_MSGTYPE_SIZE + RPC_CALL_BODY_SIZE - 4)
  631. #define rpc_verf_addr(fragcurrent) (fragcurrent - 4)
  632. #define rpc_msgtype_addr(buf) (buf + 4)
  633. #define rpc_prognum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 4)
  634. #define rpc_progver_addr(buf) (buf + RPC_MSGTYPE_SIZE + 8)
  635. #define rpc_procnum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 12)
  636. inline int
  637. __socket_read_vectored_request (rpc_transport_t *this, rpcsvc_vector_sizer vector_sizer)
  638. {
  639. socket_private_t *priv = NULL;
  640. int ret = 0;
  641. uint32_t credlen = 0, verflen = 0;
  642. char *addr = NULL;
  643. struct iobuf *iobuf = NULL;
  644. uint32_t remaining_size = 0;
  645. ssize_t readsize = 0;
  646. size_t size = 0;
  647. GF_VALIDATE_OR_GOTO ("socket", this, out);
  648. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  649. priv = this->private;
  650. switch (priv->incoming.frag.call_body.request.vector_state) {
  651. case SP_STATE_VECTORED_REQUEST_INIT:
  652. priv->incoming.frag.call_body.request.vector_sizer_state = 0;
  653. addr = rpc_cred_addr (iobuf_ptr (priv->incoming.iobuf));
  654. /* also read verf flavour and verflen */
  655. credlen = ntoh32 (*((uint32_t *)addr))
  656. + RPC_AUTH_FLAVOUR_N_LENGTH_SIZE;
  657. __socket_proto_init_pending (priv, credlen);
  658. priv->incoming.frag.call_body.request.vector_state =
  659. SP_STATE_READING_CREDBYTES;
  660. /* fall through */
  661. case SP_STATE_READING_CREDBYTES:
  662. __socket_proto_read (priv, ret);
  663. priv->incoming.frag.call_body.request.vector_state =
  664. SP_STATE_READ_CREDBYTES;
  665. /* fall through */
  666. case SP_STATE_READ_CREDBYTES:
  667. addr = rpc_verf_addr (priv->incoming.frag.fragcurrent);
  668. verflen = ntoh32 (*((uint32_t *)addr));
  669. if (verflen == 0) {
  670. priv->incoming.frag.call_body.request.vector_state
  671. = SP_STATE_READ_VERFBYTES;
  672. goto sp_state_read_verfbytes;
  673. }
  674. __socket_proto_init_pending (priv, verflen);
  675. priv->incoming.frag.call_body.request.vector_state
  676. = SP_STATE_READING_VERFBYTES;
  677. /* fall through */
  678. case SP_STATE_READING_VERFBYTES:
  679. __socket_proto_read (priv, ret);
  680. priv->incoming.frag.call_body.request.vector_state =
  681. SP_STATE_READ_VERFBYTES;
  682. /* fall through */
  683. case SP_STATE_READ_VERFBYTES:
  684. sp_state_read_verfbytes:
  685. priv->incoming.frag.call_body.request.vector_sizer_state =
  686. vector_sizer (priv->incoming.frag.call_body.request.vector_sizer_state,
  687. &readsize,
  688. priv->incoming.frag.fragcurrent);
  689. __socket_proto_init_pending (priv, readsize);
  690. priv->incoming.frag.call_body.request.vector_state
  691. = SP_STATE_READING_PROGHDR;
  692. /* fall through */
  693. case SP_STATE_READING_PROGHDR:
  694. __socket_proto_read (priv, ret);
  695. sp_state_reading_proghdr:
  696. priv->incoming.frag.call_body.request.vector_sizer_state =
  697. vector_sizer (priv->incoming.frag.call_body.request.vector_sizer_state,
  698. &readsize,
  699. priv->incoming.frag.fragcurrent);
  700. if (readsize == 0) {
  701. priv->incoming.frag.call_body.request.vector_state =
  702. SP_STATE_READ_PROGHDR;
  703. } else {
  704. __socket_proto_init_pending (priv, readsize);
  705. __socket_proto_read (priv, ret);
  706. goto sp_state_reading_proghdr;
  707. }
  708. case SP_STATE_READ_PROGHDR:
  709. if (priv->incoming.payload_vector.iov_base == NULL) {
  710. size = RPC_FRAGSIZE (priv->incoming.fraghdr) -
  711. priv->incoming.frag.bytes_read;
  712. iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
  713. if (!iobuf) {
  714. ret = -1;
  715. break;
  716. }
  717. if (priv->incoming.iobref == NULL) {
  718. priv->incoming.iobref = iobref_new ();
  719. if (priv->incoming.iobref == NULL) {
  720. ret = -1;
  721. iobuf_unref (iobuf);
  722. break;
  723. }
  724. }
  725. iobref_add (priv->incoming.iobref, iobuf);
  726. iobuf_unref (iobuf);
  727. priv->incoming.payload_vector.iov_base
  728. = iobuf_ptr (iobuf);
  729. priv->incoming.frag.fragcurrent = iobuf_ptr (iobuf);
  730. }
  731. priv->incoming.frag.call_body.request.vector_state =
  732. SP_STATE_READING_PROG;
  733. /* fall through */
  734. case SP_STATE_READING_PROG:
  735. /* now read the remaining rpc msg into buffer pointed by
  736. * fragcurrent
  737. */
  738. ret = __socket_read_simple_msg (this);
  739. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  740. - priv->incoming.frag.bytes_read;
  741. if ((ret == -1)
  742. || ((ret == 0)
  743. && (remaining_size == 0)
  744. && RPC_LASTFRAG (priv->incoming.fraghdr))) {
  745. priv->incoming.frag.call_body.request.vector_state
  746. = SP_STATE_VECTORED_REQUEST_INIT;
  747. priv->incoming.payload_vector.iov_len
  748. = (unsigned long)priv->incoming.frag.fragcurrent
  749. - (unsigned long)
  750. priv->incoming.payload_vector.iov_base;
  751. }
  752. break;
  753. }
  754. out:
  755. return ret;
  756. }
  757. inline int
  758. __socket_read_request (rpc_transport_t *this)
  759. {
  760. socket_private_t *priv = NULL;
  761. uint32_t prognum = 0, procnum = 0, progver = 0;
  762. uint32_t remaining_size = 0;
  763. int ret = -1;
  764. char *buf = NULL;
  765. rpcsvc_vector_sizer vector_sizer = NULL;
  766. GF_VALIDATE_OR_GOTO ("socket", this, out);
  767. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  768. priv = this->private;
  769. switch (priv->incoming.frag.call_body.request.header_state) {
  770. case SP_STATE_REQUEST_HEADER_INIT:
  771. __socket_proto_init_pending (priv, RPC_CALL_BODY_SIZE);
  772. priv->incoming.frag.call_body.request.header_state
  773. = SP_STATE_READING_RPCHDR1;
  774. /* fall through */
  775. case SP_STATE_READING_RPCHDR1:
  776. __socket_proto_read (priv, ret);
  777. priv->incoming.frag.call_body.request.header_state =
  778. SP_STATE_READ_RPCHDR1;
  779. /* fall through */
  780. case SP_STATE_READ_RPCHDR1:
  781. buf = rpc_prognum_addr (iobuf_ptr (priv->incoming.iobuf));
  782. prognum = ntoh32 (*((uint32_t *)buf));
  783. buf = rpc_progver_addr (iobuf_ptr (priv->incoming.iobuf));
  784. progver = ntoh32 (*((uint32_t *)buf));
  785. buf = rpc_procnum_addr (iobuf_ptr (priv->incoming.iobuf));
  786. procnum = ntoh32 (*((uint32_t *)buf));
  787. if (this->listener) {
  788. /* this check is needed as rpcsvc and rpc-clnt actor structures are
  789. * not same */
  790. vector_sizer = rpcsvc_get_program_vector_sizer ((rpcsvc_t *)this->mydata,
  791. prognum, progver, procnum);
  792. }
  793. if (vector_sizer) {
  794. ret = __socket_read_vectored_request (this, vector_sizer);
  795. } else {
  796. ret = __socket_read_simple_request (this);
  797. }
  798. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  799. - priv->incoming.frag.bytes_read;
  800. if ((ret == -1)
  801. || ((ret == 0)
  802. && (remaining_size == 0)
  803. && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
  804. priv->incoming.frag.call_body.request.header_state =
  805. SP_STATE_REQUEST_HEADER_INIT;
  806. }
  807. break;
  808. }
  809. out:
  810. return ret;
  811. }
  812. inline int
  813. __socket_read_accepted_successful_reply (rpc_transport_t *this)
  814. {
  815. socket_private_t *priv = NULL;
  816. int ret = 0;
  817. struct iobuf *iobuf = NULL;
  818. uint32_t gluster_read_rsp_hdr_len = 0;
  819. gfs3_read_rsp read_rsp = {0, };
  820. size_t size = 0;
  821. GF_VALIDATE_OR_GOTO ("socket", this, out);
  822. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  823. priv = this->private;
  824. switch (priv->incoming.frag.call_body.reply.accepted_success_state) {
  825. case SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT:
  826. gluster_read_rsp_hdr_len = xdr_sizeof ((xdrproc_t) xdr_gfs3_read_rsp,
  827. &read_rsp);
  828. if (gluster_read_rsp_hdr_len == 0) {
  829. gf_log (this->name, GF_LOG_ERROR,
  830. "xdr_sizeof on gfs3_read_rsp failed");
  831. ret = -1;
  832. goto out;
  833. }
  834. __socket_proto_init_pending (priv, gluster_read_rsp_hdr_len);
  835. priv->incoming.frag.call_body.reply.accepted_success_state
  836. = SP_STATE_READING_PROC_HEADER;
  837. /* fall through */
  838. case SP_STATE_READING_PROC_HEADER:
  839. __socket_proto_read (priv, ret);
  840. priv->incoming.frag.call_body.reply.accepted_success_state
  841. = SP_STATE_READ_PROC_HEADER;
  842. if (priv->incoming.payload_vector.iov_base == NULL) {
  843. size = (RPC_FRAGSIZE (priv->incoming.fraghdr) -
  844. priv->incoming.frag.bytes_read);
  845. iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
  846. if (iobuf == NULL) {
  847. ret = -1;
  848. goto out;
  849. }
  850. if (priv->incoming.iobref == NULL) {
  851. priv->incoming.iobref = iobref_new ();
  852. if (priv->incoming.iobref == NULL) {
  853. ret = -1;
  854. iobuf_unref (iobuf);
  855. goto out;
  856. }
  857. }
  858. iobref_add (priv->incoming.iobref, iobuf);
  859. iobuf_unref (iobuf);
  860. priv->incoming.payload_vector.iov_base
  861. = iobuf_ptr (iobuf);
  862. priv->incoming.payload_vector.iov_len = size;
  863. }
  864. priv->incoming.frag.fragcurrent
  865. = priv->incoming.payload_vector.iov_base;
  866. /* fall through */
  867. case SP_STATE_READ_PROC_HEADER:
  868. /* now read the entire remaining msg into new iobuf */
  869. ret = __socket_read_simple_msg (this);
  870. if ((ret == -1)
  871. || ((ret == 0)
  872. && RPC_LASTFRAG (priv->incoming.fraghdr))) {
  873. priv->incoming.frag.call_body.reply.accepted_success_state
  874. = SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
  875. }
  876. break;
  877. }
  878. out:
  879. return ret;
  880. }
  881. #define rpc_reply_verflen_addr(fragcurrent) ((char *)fragcurrent - 4)
  882. #define rpc_reply_accept_status_addr(fragcurrent) ((char *)fragcurrent - 4)
  883. inline int
  884. __socket_read_accepted_reply (rpc_transport_t *this)
  885. {
  886. socket_private_t *priv = NULL;
  887. int ret = -1;
  888. char *buf = NULL;
  889. uint32_t verflen = 0, len = 0;
  890. uint32_t remaining_size = 0;
  891. GF_VALIDATE_OR_GOTO ("socket", this, out);
  892. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  893. priv = this->private;
  894. switch (priv->incoming.frag.call_body.reply.accepted_state) {
  895. case SP_STATE_ACCEPTED_REPLY_INIT:
  896. __socket_proto_init_pending (priv,
  897. RPC_AUTH_FLAVOUR_N_LENGTH_SIZE);
  898. priv->incoming.frag.call_body.reply.accepted_state
  899. = SP_STATE_READING_REPLY_VERFLEN;
  900. /* fall through */
  901. case SP_STATE_READING_REPLY_VERFLEN:
  902. __socket_proto_read (priv, ret);
  903. priv->incoming.frag.call_body.reply.accepted_state
  904. = SP_STATE_READ_REPLY_VERFLEN;
  905. /* fall through */
  906. case SP_STATE_READ_REPLY_VERFLEN:
  907. buf = rpc_reply_verflen_addr (priv->incoming.frag.fragcurrent);
  908. verflen = ntoh32 (*((uint32_t *) buf));
  909. /* also read accept status along with verf data */
  910. len = verflen + RPC_ACCEPT_STATUS_LEN;
  911. __socket_proto_init_pending (priv, len);
  912. priv->incoming.frag.call_body.reply.accepted_state
  913. = SP_STATE_READING_REPLY_VERFBYTES;
  914. /* fall through */
  915. case SP_STATE_READING_REPLY_VERFBYTES:
  916. __socket_proto_read (priv, ret);
  917. priv->incoming.frag.call_body.reply.accepted_state
  918. = SP_STATE_READ_REPLY_VERFBYTES;
  919. buf = rpc_reply_accept_status_addr (priv->incoming.frag.fragcurrent);
  920. priv->incoming.frag.call_body.reply.accept_status
  921. = ntoh32 (*(uint32_t *) buf);
  922. /* fall through */
  923. case SP_STATE_READ_REPLY_VERFBYTES:
  924. if (priv->incoming.frag.call_body.reply.accept_status
  925. == SUCCESS) {
  926. ret = __socket_read_accepted_successful_reply (this);
  927. } else {
  928. /* read entire remaining msg into buffer pointed to by
  929. * fragcurrent
  930. */
  931. ret = __socket_read_simple_msg (this);
  932. }
  933. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  934. - priv->incoming.frag.bytes_read;
  935. if ((ret == -1)
  936. || ((ret == 0)
  937. && (remaining_size == 0)
  938. && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
  939. priv->incoming.frag.call_body.reply.accepted_state
  940. = SP_STATE_ACCEPTED_REPLY_INIT;
  941. }
  942. break;
  943. }
  944. out:
  945. return ret;
  946. }
  947. inline int
  948. __socket_read_denied_reply (rpc_transport_t *this)
  949. {
  950. return __socket_read_simple_msg (this);
  951. }
  952. #define rpc_reply_status_addr(fragcurrent) ((char *)fragcurrent - 4)
  953. inline int
  954. __socket_read_vectored_reply (rpc_transport_t *this)
  955. {
  956. socket_private_t *priv = NULL;
  957. int ret = 0;
  958. char *buf = NULL;
  959. uint32_t remaining_size = 0;
  960. GF_VALIDATE_OR_GOTO ("socket", this, out);
  961. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  962. priv = this->private;
  963. switch (priv->incoming.frag.call_body.reply.status_state) {
  964. case SP_STATE_ACCEPTED_REPLY_INIT:
  965. __socket_proto_init_pending (priv, RPC_REPLY_STATUS_SIZE);
  966. priv->incoming.frag.call_body.reply.status_state
  967. = SP_STATE_READING_REPLY_STATUS;
  968. /* fall through */
  969. case SP_STATE_READING_REPLY_STATUS:
  970. __socket_proto_read (priv, ret);
  971. buf = rpc_reply_status_addr (priv->incoming.frag.fragcurrent);
  972. priv->incoming.frag.call_body.reply.accept_status
  973. = ntoh32 (*((uint32_t *) buf));
  974. priv->incoming.frag.call_body.reply.status_state
  975. = SP_STATE_READ_REPLY_STATUS;
  976. /* fall through */
  977. case SP_STATE_READ_REPLY_STATUS:
  978. if (priv->incoming.frag.call_body.reply.accept_status
  979. == MSG_ACCEPTED) {
  980. ret = __socket_read_accepted_reply (this);
  981. } else {
  982. ret = __socket_read_denied_reply (this);
  983. }
  984. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  985. - priv->incoming.frag.bytes_read;
  986. if ((ret == -1)
  987. || ((ret == 0)
  988. && (remaining_size == 0)
  989. && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
  990. priv->incoming.frag.call_body.reply.status_state
  991. = SP_STATE_ACCEPTED_REPLY_INIT;
  992. priv->incoming.payload_vector.iov_len
  993. = (unsigned long)priv->incoming.frag.fragcurrent
  994. - (unsigned long)
  995. priv->incoming.payload_vector.iov_base;
  996. }
  997. break;
  998. }
  999. out:
  1000. return ret;
  1001. }
  1002. inline int
  1003. __socket_read_simple_reply (rpc_transport_t *this)
  1004. {
  1005. return __socket_read_simple_msg (this);
  1006. }
  1007. #define rpc_xid_addr(buf) (buf)
  1008. inline int
  1009. __socket_read_reply (rpc_transport_t *this)
  1010. {
  1011. socket_private_t *priv = NULL;
  1012. char *buf = NULL;
  1013. int32_t ret = -1;
  1014. rpc_request_info_t *request_info = NULL;
  1015. char map_xid = 0;
  1016. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1017. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1018. priv = this->private;
  1019. buf = rpc_xid_addr (iobuf_ptr (priv->incoming.iobuf));
  1020. if (priv->incoming.request_info == NULL) {
  1021. priv->incoming.request_info = GF_CALLOC (1,
  1022. sizeof (*request_info),
  1023. gf_common_mt_rpc_trans_reqinfo_t);
  1024. if (priv->incoming.request_info == NULL) {
  1025. goto out;
  1026. }
  1027. map_xid = 1;
  1028. }
  1029. request_info = priv->incoming.request_info;
  1030. if (map_xid) {
  1031. request_info->xid = ntoh32 (*((uint32_t *) buf));
  1032. /* release priv->lock, so as to avoid deadlock b/w conn->lock
  1033. * and priv->lock, since we are doing an upcall here.
  1034. */
  1035. pthread_mutex_unlock (&priv->lock);
  1036. {
  1037. ret = rpc_transport_notify (this,
  1038. RPC_TRANSPORT_MAP_XID_REQUEST,
  1039. priv->incoming.request_info);
  1040. }
  1041. pthread_mutex_lock (&priv->lock);
  1042. if (ret == -1) {
  1043. gf_log (this->name, GF_LOG_WARNING,
  1044. "notify for event MAP_XID failed");
  1045. goto out;
  1046. }
  1047. }
  1048. if ((request_info->prognum == GLUSTER3_1_FOP_PROGRAM)
  1049. && (request_info->procnum == GF_FOP_READ)) {
  1050. if (map_xid && request_info->rsp.rsp_payload_count != 0) {
  1051. priv->incoming.iobref
  1052. = iobref_ref (request_info->rsp.rsp_iobref);
  1053. priv->incoming.payload_vector
  1054. = *request_info->rsp.rsp_payload;
  1055. }
  1056. ret = __socket_read_vectored_reply (this);
  1057. } else {
  1058. ret = __socket_read_simple_reply (this);
  1059. }
  1060. out:
  1061. return ret;
  1062. }
  1063. /* returns the number of bytes yet to be read in a fragment */
  1064. inline int
  1065. __socket_read_frag (rpc_transport_t *this)
  1066. {
  1067. socket_private_t *priv = NULL;
  1068. int32_t ret = 0;
  1069. char *buf = NULL;
  1070. uint32_t remaining_size = 0;
  1071. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1072. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1073. priv = this->private;
  1074. switch (priv->incoming.frag.state) {
  1075. case SP_STATE_NADA:
  1076. __socket_proto_init_pending (priv, RPC_MSGTYPE_SIZE);
  1077. priv->incoming.frag.state = SP_STATE_READING_MSGTYPE;
  1078. /* fall through */
  1079. case SP_STATE_READING_MSGTYPE:
  1080. __socket_proto_read (priv, ret);
  1081. priv->incoming.frag.state = SP_STATE_READ_MSGTYPE;
  1082. /* fall through */
  1083. case SP_STATE_READ_MSGTYPE:
  1084. buf = rpc_msgtype_addr (iobuf_ptr (priv->incoming.iobuf));
  1085. priv->incoming.msg_type = ntoh32 (*((uint32_t *)buf));
  1086. if (priv->incoming.msg_type == CALL) {
  1087. ret = __socket_read_request (this);
  1088. } else if (priv->incoming.msg_type == REPLY) {
  1089. ret = __socket_read_reply (this);
  1090. } else if (priv->incoming.msg_type == GF_UNIVERSAL_ANSWER) {
  1091. gf_log ("rpc", GF_LOG_ERROR,
  1092. "older version of protocol/process trying to "
  1093. "connect from %s. use newer version on that node",
  1094. this->peerinfo.identifier);
  1095. } else {
  1096. gf_log ("rpc", GF_LOG_ERROR,
  1097. "wrong MSG-TYPE (%d) received from %s",
  1098. priv->incoming.msg_type,
  1099. this->peerinfo.identifier);
  1100. ret = -1;
  1101. }
  1102. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  1103. - priv->incoming.frag.bytes_read;
  1104. if ((ret == -1)
  1105. || ((ret == 0)
  1106. && (remaining_size == 0)
  1107. && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
  1108. priv->incoming.frag.state = SP_STATE_NADA;
  1109. }
  1110. break;
  1111. }
  1112. out:
  1113. return ret;
  1114. }
  1115. inline
  1116. void __socket_reset_priv (socket_private_t *priv)
  1117. {
  1118. if (priv->incoming.iobref) {
  1119. iobref_unref (priv->incoming.iobref);
  1120. priv->incoming.iobref = NULL;
  1121. }
  1122. if (priv->incoming.iobuf) {
  1123. iobuf_unref (priv->incoming.iobuf);
  1124. }
  1125. if (priv->incoming.request_info != NULL) {
  1126. GF_FREE (priv->incoming.request_info);
  1127. priv->incoming.request_info = NULL;
  1128. }
  1129. memset (&priv->incoming.payload_vector, 0,
  1130. sizeof (priv->incoming.payload_vector));
  1131. priv->incoming.iobuf = NULL;
  1132. }
  1133. int
  1134. __socket_proto_state_machine (rpc_transport_t *this,
  1135. rpc_transport_pollin_t **pollin)
  1136. {
  1137. int ret = -1;
  1138. socket_private_t *priv = NULL;
  1139. struct iobuf *iobuf = NULL;
  1140. struct iobref *iobref = NULL;
  1141. struct iovec vector[2];
  1142. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1143. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1144. priv = this->private;
  1145. while (priv->incoming.record_state != SP_STATE_COMPLETE) {
  1146. switch (priv->incoming.record_state) {
  1147. case SP_STATE_NADA:
  1148. priv->incoming.total_bytes_read = 0;
  1149. priv->incoming.payload_vector.iov_len = 0;
  1150. priv->incoming.pending_vector = priv->incoming.vector;
  1151. priv->incoming.pending_vector->iov_base =
  1152. &priv->incoming.fraghdr;
  1153. priv->incoming.pending_vector->iov_len =
  1154. sizeof (priv->incoming.fraghdr);
  1155. priv->incoming.record_state = SP_STATE_READING_FRAGHDR;
  1156. /* fall through */
  1157. case SP_STATE_READING_FRAGHDR:
  1158. ret = __socket_readv (this,
  1159. priv->incoming.pending_vector, 1,
  1160. &priv->incoming.pending_vector,
  1161. &priv->incoming.pending_count,
  1162. NULL);
  1163. if (ret == -1) {
  1164. if (priv->read_fail_log == 1) {
  1165. gf_log (this->name,
  1166. ((priv->connected == 1) ?
  1167. GF_LOG_WARNING : GF_LOG_DEBUG),
  1168. "reading from socket failed. Error (%s)"
  1169. ", peer (%s)", strerror (errno),
  1170. this->peerinfo.identifier);
  1171. }
  1172. goto out;
  1173. }
  1174. if (ret > 0) {
  1175. gf_log (this->name, GF_LOG_TRACE, "partial "
  1176. "fragment header read");
  1177. goto out;
  1178. }
  1179. if (ret == 0) {
  1180. priv->incoming.record_state =
  1181. SP_STATE_READ_FRAGHDR;
  1182. }
  1183. /* fall through */
  1184. case SP_STATE_READ_FRAGHDR:
  1185. priv->incoming.fraghdr = ntoh32 (priv->incoming.fraghdr);
  1186. priv->incoming.record_state = SP_STATE_READING_FRAG;
  1187. priv->incoming.total_bytes_read
  1188. += RPC_FRAGSIZE(priv->incoming.fraghdr);
  1189. iobuf = iobuf_get2 (this->ctx->iobuf_pool,
  1190. priv->incoming.total_bytes_read +
  1191. sizeof (priv->incoming.fraghdr));
  1192. if (!iobuf) {
  1193. ret = -ENOMEM;
  1194. goto out;
  1195. }
  1196. priv->incoming.iobuf = iobuf;
  1197. priv->incoming.iobuf_size = 0;
  1198. priv->incoming.frag.fragcurrent = iobuf_ptr (iobuf);
  1199. /* fall through */
  1200. case SP_STATE_READING_FRAG:
  1201. ret = __socket_read_frag (this);
  1202. if ((ret == -1)
  1203. || (priv->incoming.frag.bytes_read !=
  1204. RPC_FRAGSIZE (priv->incoming.fraghdr))) {
  1205. goto out;
  1206. }
  1207. priv->incoming.frag.bytes_read = 0;
  1208. if (!RPC_LASTFRAG (priv->incoming.fraghdr)) {
  1209. priv->incoming.record_state =
  1210. SP_STATE_READING_FRAGHDR;
  1211. break;
  1212. }
  1213. /* we've read the entire rpc record, notify the
  1214. * upper layers.
  1215. */
  1216. if (pollin != NULL) {
  1217. int count = 0;
  1218. priv->incoming.iobuf_size
  1219. = priv->incoming.total_bytes_read
  1220. - priv->incoming.payload_vector.iov_len;
  1221. memset (vector, 0, sizeof (vector));
  1222. if (priv->incoming.iobref == NULL) {
  1223. priv->incoming.iobref = iobref_new ();
  1224. if (priv->incoming.iobref == NULL) {
  1225. ret = -1;
  1226. goto out;
  1227. }
  1228. }
  1229. vector[count].iov_base
  1230. = iobuf_ptr (priv->incoming.iobuf);
  1231. vector[count].iov_len
  1232. = priv->incoming.iobuf_size;
  1233. iobref = priv->incoming.iobref;
  1234. count++;
  1235. if (priv->incoming.payload_vector.iov_base
  1236. != NULL) {
  1237. vector[count]
  1238. = priv->incoming.payload_vector;
  1239. count++;
  1240. }
  1241. *pollin = rpc_transport_pollin_alloc (this,
  1242. vector,
  1243. count,
  1244. priv->incoming.iobuf,
  1245. iobref,
  1246. priv->incoming.request_info);
  1247. iobuf_unref (priv->incoming.iobuf);
  1248. priv->incoming.iobuf = NULL;
  1249. if (*pollin == NULL) {
  1250. gf_log (this->name, GF_LOG_WARNING,
  1251. "transport pollin allocation failed");
  1252. ret = -1;
  1253. goto out;
  1254. }
  1255. if (priv->incoming.msg_type == REPLY)
  1256. (*pollin)->is_reply = 1;
  1257. priv->incoming.request_info = NULL;
  1258. }
  1259. priv->incoming.record_state = SP_STATE_COMPLETE;
  1260. break;
  1261. case SP_STATE_COMPLETE:
  1262. /* control should not reach here */
  1263. gf_log (this->name, GF_LOG_WARNING, "control reached to "
  1264. "SP_STATE_COMPLETE, which should not have "
  1265. "happened");
  1266. break;
  1267. }
  1268. }
  1269. if (priv->incoming.record_state == SP_STATE_COMPLETE) {
  1270. priv->incoming.record_state = SP_STATE_NADA;
  1271. __socket_reset_priv (priv);
  1272. }
  1273. out:
  1274. if ((ret == -1) && (errno == EAGAIN)) {
  1275. ret = 0;
  1276. }
  1277. return ret;
  1278. }
  1279. int
  1280. socket_proto_state_machine (rpc_transport_t *this,
  1281. rpc_transport_pollin_t **pollin)
  1282. {
  1283. socket_private_t *priv = NULL;
  1284. int ret = 0;
  1285. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1286. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1287. priv = this->private;
  1288. pthread_mutex_lock (&priv->lock);
  1289. {
  1290. ret = __socket_proto_state_machine (this, pollin);
  1291. }
  1292. pthread_mutex_unlock (&priv->lock);
  1293. out:
  1294. return ret;
  1295. }
  1296. int
  1297. socket_event_poll_in (rpc_transport_t *this)
  1298. {
  1299. int ret = -1;
  1300. rpc_transport_pollin_t *pollin = NULL;
  1301. ret = socket_proto_state_machine (this, &pollin);
  1302. if (pollin != NULL) {
  1303. ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_RECEIVED,
  1304. pollin);
  1305. rpc_transport_pollin_destroy (pollin);
  1306. }
  1307. return ret;
  1308. }
  1309. int
  1310. socket_connect_finish (rpc_transport_t *this)
  1311. {
  1312. int ret = -1;
  1313. socket_private_t *priv = NULL;
  1314. rpc_transport_event_t event = 0;
  1315. char notify_rpc = 0;
  1316. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1317. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1318. priv = this->private;
  1319. pthread_mutex_lock (&priv->lock);
  1320. {
  1321. if (priv->connected)
  1322. goto unlock;
  1323. ret = __socket_connect_finish (priv->sock);
  1324. if (ret == -1 && errno == EINPROGRESS)
  1325. ret = 1;
  1326. if (ret == -1 && errno != EINPROGRESS) {
  1327. if (!priv->connect_finish_log) {
  1328. gf_log (this->name, GF_LOG_ERROR,
  1329. "connection to %s failed (%s)",
  1330. this->peerinfo.identifier,
  1331. strerror (errno));
  1332. priv->connect_finish_log = 1;
  1333. }
  1334. __socket_disconnect (this);
  1335. notify_rpc = 1;
  1336. event = RPC_TRANSPORT_DISCONNECT;
  1337. goto unlock;
  1338. }
  1339. if (ret == 0) {
  1340. notify_rpc = 1;
  1341. this->myinfo.sockaddr_len =
  1342. sizeof (this->myinfo.sockaddr);
  1343. ret = getsockname (priv->sock,
  1344. SA (&this->myinfo.sockaddr),
  1345. &this->myinfo.sockaddr_len);
  1346. if (ret == -1) {
  1347. gf_log (this->name, GF_LOG_WARNING,
  1348. "getsockname on (%d) failed (%s)",
  1349. priv->sock, strerror (errno));
  1350. __socket_disconnect (this);
  1351. event = GF_EVENT_POLLERR;
  1352. goto unlock;
  1353. }
  1354. priv->connected = 1;
  1355. priv->connect_finish_log = 0;
  1356. event = RPC_TRANSPORT_CONNECT;
  1357. get_transport_identifiers (this);
  1358. }
  1359. }
  1360. unlock:
  1361. pthread_mutex_unlock (&priv->lock);
  1362. if (notify_rpc) {
  1363. rpc_transport_notify (this, event, this);
  1364. }
  1365. out:
  1366. return 0;
  1367. }
  1368. /* reads rpc_requests during pollin */
  1369. int
  1370. socket_event_handler (int fd, int idx, void *data,
  1371. int poll_in, int poll_out, int poll_err)
  1372. {
  1373. rpc_transport_t *this = NULL;
  1374. socket_private_t *priv = NULL;
  1375. int ret = 0;
  1376. this = data;
  1377. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1378. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1379. GF_VALIDATE_OR_GOTO ("socket", this->xl, out);
  1380. THIS = this->xl;
  1381. priv = this->private;
  1382. pthread_mutex_lock (&priv->lock);
  1383. {
  1384. priv->idx = idx;
  1385. }
  1386. pthread_mutex_unlock (&priv->lock);
  1387. if (!priv->connected) {
  1388. ret = socket_connect_finish (this);
  1389. }
  1390. if (!ret && poll_out) {
  1391. ret = socket_event_poll_out (this);
  1392. }
  1393. if (!ret && poll_in) {
  1394. ret = socket_event_poll_in (this);
  1395. }
  1396. if ((ret < 0) || poll_err) {
  1397. /* Logging has happened already in earlier cases */
  1398. gf_log ("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG),
  1399. "disconnecting now");
  1400. socket_event_poll_err (this);
  1401. rpc_transport_unref (this);
  1402. }
  1403. out:
  1404. return 0;
  1405. }
  1406. int
  1407. socket_server_event_handler (int fd, int idx, void *data,
  1408. int poll_in, int poll_out, int poll_err)
  1409. {
  1410. rpc_transport_t *this = NULL;
  1411. socket_private_t *priv = NULL;
  1412. int ret = 0;
  1413. int new_sock = -1;
  1414. rpc_transport_t *new_trans = NULL;
  1415. struct sockaddr_storage new_sockaddr = {0, };
  1416. socklen_t addrlen = sizeof (new_sockaddr);
  1417. socket_private_t *new_priv = NULL;
  1418. glusterfs_ctx_t *ctx = NULL;
  1419. this = data;
  1420. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1421. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1422. GF_VALIDATE_OR_GOTO ("socket", this->xl, out);
  1423. THIS = this->xl;
  1424. priv = this->private;
  1425. ctx = this->ctx;
  1426. pthread_mutex_lock (&priv->lock);
  1427. {
  1428. priv->idx = idx;
  1429. if (poll_in) {
  1430. new_sock = accept (priv->sock, SA (&new_sockaddr),
  1431. &addrlen);
  1432. if (new_sock == -1) {
  1433. gf_log (this->name, GF_LOG_WARNING,
  1434. "accept on %d failed (%s)",
  1435. priv->sock, strerror (errno));
  1436. goto unlock;
  1437. }
  1438. if (!priv->bio) {
  1439. ret = __socket_nonblock (new_sock);
  1440. if (ret == -1) {
  1441. gf_log (this->name, GF_LOG_WARNING,
  1442. "NBIO on %d failed (%s)",
  1443. new_sock, strerror (errno));
  1444. close (new_sock);
  1445. goto unlock;
  1446. }
  1447. }
  1448. if (priv->nodelay) {
  1449. ret = __socket_nodelay (new_sock);
  1450. if (ret == -1) {
  1451. gf_log (this->name, GF_LOG_WARNING,
  1452. "setsockopt() failed for "
  1453. "NODELAY (%s)",
  1454. strerror (errno));
  1455. }
  1456. }
  1457. if (priv->keepalive) {
  1458. ret = __socket_keepalive (new_sock,
  1459. priv->keepaliveintvl,
  1460. priv->keepaliveidle);
  1461. if (ret == -1)
  1462. gf_log (this->name, GF_LOG_WARNING,
  1463. "Failed to set keep-alive: %s",
  1464. strerror (errno));
  1465. }
  1466. new_trans = GF_CALLOC (1, sizeof (*new_trans),
  1467. gf_common_mt_rpc_trans_t);
  1468. if (!new_trans)
  1469. goto unlock;
  1470. new_trans->name = gf_strdup (this->name);
  1471. memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
  1472. addrlen);
  1473. new_trans->peerinfo.sockaddr_len = addrlen;
  1474. new_trans->myinfo.sockaddr_len =
  1475. sizeof (new_trans->myinfo.sockaddr);
  1476. ret = getsockname (new_sock,
  1477. SA (&new_trans->myinfo.sockaddr),
  1478. &new_trans->myinfo.sockaddr_len);
  1479. if (ret == -1) {
  1480. gf_log (this->name, GF_LOG_WARNING,
  1481. "getsockname on %d failed (%s)",
  1482. new_sock, strerror (errno));
  1483. close (new_sock);
  1484. goto unlock;
  1485. }
  1486. get_transport_identifiers (new_trans);
  1487. socket_init (new_trans);
  1488. new_trans->ops = this->ops;
  1489. new_trans->init = this->init;
  1490. new_trans->fini = this->fini;
  1491. new_trans->ctx = ctx;
  1492. new_trans->xl = this->xl;
  1493. new_trans->mydata = this->mydata;
  1494. new_trans->notify = this->notify;
  1495. new_trans->listener = this;
  1496. new_priv = new_trans->private;
  1497. pthread_mutex_lock (&new_priv->lock);
  1498. {
  1499. new_priv->sock = new_sock;
  1500. new_priv->connected = 1;
  1501. rpc_transport_ref (new_trans);
  1502. new_priv->idx =
  1503. event_register (ctx->event_pool,
  1504. new_sock,
  1505. socket_event_handler,
  1506. new_trans, 1, 0);
  1507. if (new_priv->idx == -1)
  1508. ret = -1;
  1509. }
  1510. pthread_mutex_unlock (&new_priv->lock);
  1511. if (ret == -1) {
  1512. gf_log (this->name, GF_LOG_WARNING,
  1513. "failed to register the socket with event");
  1514. goto unlock;
  1515. }
  1516. ret = rpc_transport_notify (this, RPC_TRANSPORT_ACCEPT,
  1517. new_trans);
  1518. }
  1519. }
  1520. unlock:
  1521. pthread_mutex_unlock (&priv->lock);
  1522. out:
  1523. return ret;
  1524. }
  1525. int
  1526. socket_disconnect (rpc_transport_t *this)
  1527. {
  1528. socket_private_t *priv = NULL;
  1529. int ret = -1;
  1530. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1531. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1532. priv = this->private;
  1533. pthread_mutex_lock (&priv->lock);
  1534. {
  1535. ret = __socket_disconnect (this);
  1536. }
  1537. pthread_mutex_unlock (&priv->lock);
  1538. out:
  1539. return ret;
  1540. }
  1541. int
  1542. socket_connect (rpc_transport_t *this, int port)
  1543. {
  1544. int ret = -1;
  1545. int sock = -1;
  1546. socket_private_t *priv = NULL;
  1547. socklen_t sockaddr_len = 0;
  1548. glusterfs_ctx_t *ctx = NULL;
  1549. sa_family_t sa_family = {0, };
  1550. union gf_sock_union sock_union;
  1551. GF_VALIDATE_OR_GOTO ("socket", this, err);
  1552. GF_VALIDATE_OR_GOTO ("socket", this->private, err);
  1553. priv = this->private;
  1554. ctx = this->ctx;
  1555. if (!priv) {
  1556. gf_log_callingfn (this->name, GF_LOG_WARNING,
  1557. "connect() called on uninitialized transport");
  1558. goto err;
  1559. }
  1560. pthread_mutex_lock (&priv->lock);
  1561. {
  1562. sock = priv->sock;
  1563. }
  1564. pthread_mutex_unlock (&priv->lock);
  1565. if (sock != -1) {
  1566. gf_log_callingfn (this->name, GF_LOG_TRACE,
  1567. "connect () called on transport already connected");
  1568. errno = EINPROGRESS;
  1569. ret = -1;
  1570. goto err;
  1571. }
  1572. ret = socket_client_get_remote_sockaddr (this, &sock_union.sa,
  1573. &sockaddr_len, &sa_family);
  1574. if (ret == -1) {
  1575. /* logged inside client_get_remote_sockaddr */
  1576. goto err;
  1577. }
  1578. if (port > 0) {
  1579. sock_union.sin.sin_port = htons (port);
  1580. }
  1581. pthread_mutex_lock (&priv->lock);
  1582. {
  1583. if (priv->sock != -1) {
  1584. gf_log (this->name, GF_LOG_TRACE,
  1585. "connect() -- already connected");
  1586. goto unlock;
  1587. }
  1588. memcpy (&this->peerinfo.sockaddr, &sock_union.storage,
  1589. sockaddr_len);
  1590. this->peerinfo.sockaddr_len = sockaddr_len;
  1591. priv->sock = socket (sa_family, SOCK_STREAM, 0);
  1592. if (priv->sock == -1) {
  1593. gf_log (this->name, GF_LOG_ERROR,
  1594. "socket creation failed (%s)",
  1595. strerror (errno));
  1596. goto unlock;
  1597. }
  1598. /* Cant help if setting socket options fails. We can continue
  1599. * working nonetheless.
  1600. */
  1601. if (priv->windowsize != 0) {
  1602. if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
  1603. &priv->windowsize,
  1604. sizeof (priv->windowsize)) < 0) {
  1605. gf_log (this->name, GF_LOG_ERROR,
  1606. "setting receive window "
  1607. "size failed: %d: %d: %s",
  1608. priv->sock, priv->windowsize,
  1609. strerror (errno));
  1610. }
  1611. if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
  1612. &priv->windowsize,
  1613. sizeof (priv->windowsize)) < 0) {
  1614. gf_log (this->name, GF_LOG_ERROR,
  1615. "setting send window size "
  1616. "failed: %d: %d: %s",
  1617. priv->sock, priv->windowsize,
  1618. strerror (errno));
  1619. }
  1620. }
  1621. if (!priv->bio) {
  1622. ret = __socket_nonblock (priv->sock);
  1623. if (ret == -1) {
  1624. gf_log (this->name, GF_LOG_ERROR,
  1625. "NBIO on %d failed (%s)",
  1626. priv->sock, strerror (errno));
  1627. close (priv->sock);
  1628. priv->sock = -1;
  1629. goto unlock;
  1630. }
  1631. }
  1632. if (priv->keepalive) {
  1633. ret = __socket_keepalive (priv->sock,
  1634. priv->keepaliveintvl,
  1635. priv->keepaliveidle);
  1636. if (ret == -1)
  1637. gf_log (this->name, GF_LOG_ERROR,
  1638. "Failed to set keep-alive: %s",
  1639. strerror (errno));
  1640. }
  1641. SA (&this->myinfo.sockaddr)->sa_family =
  1642. SA (&this->peerinfo.sockaddr)->sa_family;
  1643. ret = client_bind (this, SA (&this->myinfo.sockaddr),
  1644. &this->myinfo.sockaddr_len, priv->sock);
  1645. if (ret == -1) {
  1646. gf_log (this->name, GF_LOG_WARNING,
  1647. "client bind failed: %s", strerror (errno));
  1648. close (priv->sock);
  1649. priv->sock = -1;
  1650. goto unlock;
  1651. }
  1652. ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
  1653. this->peerinfo.sockaddr_len);
  1654. if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
  1655. gf_log (this->name, GF_LOG_ERROR,
  1656. "connection attempt failed (%s)",
  1657. strerror (errno));
  1658. close (priv->sock);
  1659. priv->sock = -1;
  1660. goto unlock;
  1661. }
  1662. priv->connected = 0;
  1663. rpc_transport_ref (this);
  1664. priv->idx = event_register (ctx->event_pool, priv->sock,
  1665. socket_event_handler, this, 1, 1);
  1666. if (priv->idx == -1) {
  1667. gf_log (this->name, GF_LOG_WARNING,
  1668. "failed to register the event");
  1669. ret = -1;
  1670. }
  1671. }
  1672. unlock:
  1673. pthread_mutex_unlock (&priv->lock);
  1674. err:
  1675. return ret;
  1676. }
  1677. int
  1678. socket_listen (rpc_transport_t *this)
  1679. {
  1680. socket_private_t * priv = NULL;
  1681. int ret = -1;
  1682. int sock = -1;
  1683. struct sockaddr_storage sockaddr;
  1684. socklen_t sockaddr_len = 0;
  1685. peer_info_t *myinfo = NULL;
  1686. glusterfs_ctx_t *ctx = NULL;
  1687. sa_family_t sa_family = {0, };
  1688. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1689. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1690. priv = this->private;
  1691. myinfo = &this->myinfo;
  1692. ctx = this->ctx;
  1693. pthread_mutex_lock (&priv->lock);
  1694. {
  1695. sock = priv->sock;
  1696. }
  1697. pthread_mutex_unlock (&priv->lock);
  1698. if (sock != -1) {
  1699. gf_log_callingfn (this->name, GF_LOG_DEBUG,
  1700. "already listening");
  1701. return ret;
  1702. }
  1703. ret = socket_server_get_local_sockaddr (this, SA (&sockaddr),
  1704. &sockaddr_len, &sa_family);
  1705. if (ret == -1) {
  1706. return ret;
  1707. }
  1708. pthread_mutex_lock (&priv->lock);
  1709. {
  1710. if (priv->sock != -1) {
  1711. gf_log (this->name, GF_LOG_DEBUG,
  1712. "already listening");
  1713. goto unlock;
  1714. }
  1715. memcpy (&myinfo->sockaddr, &sockaddr, sockaddr_len);
  1716. myinfo->sockaddr_len = sockaddr_len;
  1717. priv->sock = socket (sa_family, SOCK_STREAM, 0);
  1718. if (priv->sock == -1) {
  1719. gf_log (this->name, GF_LOG_ERROR,
  1720. "socket creation failed (%s)",
  1721. strerror (errno));
  1722. goto unlock;
  1723. }
  1724. /* Cant help if setting socket options fails. We can continue
  1725. * working nonetheless.
  1726. */
  1727. if (priv->windowsize != 0) {
  1728. if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
  1729. &priv->windowsize,
  1730. sizeof (priv->windowsize)) < 0) {
  1731. gf_log (this->name, GF_LOG_ERROR,
  1732. "setting receive window size "
  1733. "failed: %d: %d: %s", priv->sock,
  1734. priv->windowsize,
  1735. strerror (errno));
  1736. }
  1737. if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
  1738. &priv->windowsize,
  1739. sizeof (priv->windowsize)) < 0) {
  1740. gf_log (this->name, GF_LOG_ERROR,
  1741. "setting send window size failed:"
  1742. " %d: %d: %s", priv->sock,
  1743. priv->windowsize,
  1744. strerror (errno));
  1745. }
  1746. }
  1747. if (priv->nodelay) {
  1748. ret = __socket_nodelay (priv->sock);
  1749. if (ret == -1) {
  1750. gf_log (this->name, GF_LOG_ERROR,
  1751. "setsockopt() failed for NODELAY (%s)",
  1752. strerror (errno));
  1753. }
  1754. }
  1755. if (!priv->bio) {
  1756. ret = __socket_nonblock (priv->sock);
  1757. if (ret == -1) {
  1758. gf_log (this->name, GF_LOG_ERROR,
  1759. "NBIO on %d failed (%s)",
  1760. priv->sock, strerror (errno));
  1761. close (priv->sock);
  1762. priv->sock = -1;
  1763. goto unlock;
  1764. }
  1765. }
  1766. ret = __socket_server_bind (this);
  1767. if (ret == -1) {
  1768. /* logged inside __socket_server_bind() */
  1769. close (priv->sock);
  1770. priv->sock = -1;
  1771. goto unlock;
  1772. }
  1773. if (priv->backlog)
  1774. ret = listen (priv->sock, priv->backlog);
  1775. else
  1776. ret = listen (priv->sock, 10);
  1777. if (ret == -1) {
  1778. gf_log (this->name, GF_LOG_ERROR,
  1779. "could not set socket %d to listen mode (%s)",
  1780. priv->sock, strerror (errno));
  1781. close (priv->sock);
  1782. priv->sock = -1;
  1783. goto unlock;
  1784. }
  1785. rpc_transport_ref (this);
  1786. priv->idx = event_register (ctx->event_pool, priv->sock,
  1787. socket_server_event_handler,
  1788. this, 1, 0);
  1789. if (priv->idx == -1) {
  1790. gf_log (this->name, GF_LOG_WARNING,
  1791. "could not register socket %d with events",
  1792. priv->sock);
  1793. ret = -1;
  1794. close (priv->sock);
  1795. priv->sock = -1;
  1796. goto unlock;
  1797. }
  1798. }
  1799. unlock:
  1800. pthread_mutex_unlock (&priv->lock);
  1801. out:
  1802. return ret;
  1803. }
  1804. int32_t
  1805. socket_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
  1806. {
  1807. socket_private_t *priv = NULL;
  1808. int ret = -1;
  1809. char need_poll_out = 0;
  1810. char need_append = 1;
  1811. struct ioq *entry = NULL;
  1812. glusterfs_ctx_t *ctx = NULL;
  1813. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1814. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1815. priv = this->private;
  1816. ctx = this->ctx;
  1817. pthread_mutex_lock (&priv->lock);
  1818. {
  1819. if (priv->connected != 1) {
  1820. if (!priv->submit_log && !priv->connect_finish_log) {
  1821. gf_log (this->name, GF_LOG_INFO,
  1822. "not connected (priv->connected = %d)",
  1823. priv->connected);
  1824. priv->submit_log = 1;
  1825. }
  1826. goto unlock;
  1827. }
  1828. priv->submit_log = 0;
  1829. entry = __socket_ioq_new (this, &req->msg);
  1830. if (!entry)
  1831. goto unlock;
  1832. if (list_empty (&priv->ioq)) {
  1833. ret = __socket_ioq_churn_entry (this, entry);
  1834. if (ret == 0)
  1835. need_append = 0;
  1836. if (ret > 0)
  1837. need_poll_out = 1;
  1838. }
  1839. if (need_append) {
  1840. list_add_tail (&entry->list, &priv->ioq);
  1841. ret = 0;
  1842. }
  1843. if (need_poll_out) {
  1844. /* first entry to wait. continue writing on POLLOUT */
  1845. priv->idx = event_select_on (ctx->event_pool,
  1846. priv->sock,
  1847. priv->idx, -1, 1);
  1848. }
  1849. }
  1850. unlock:
  1851. pthread_mutex_unlock (&priv->lock);
  1852. out:
  1853. return ret;
  1854. }
  1855. int32_t
  1856. socket_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
  1857. {
  1858. socket_private_t *priv = NULL;
  1859. int ret = -1;
  1860. char need_poll_out = 0;
  1861. char need_append = 1;
  1862. struct ioq *entry = NULL;
  1863. glusterfs_ctx_t *ctx = NULL;
  1864. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1865. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1866. priv = this->private;
  1867. ctx = this->ctx;
  1868. pthread_mutex_lock (&priv->lock);
  1869. {
  1870. if (priv->connected != 1) {
  1871. if (!priv->submit_log && !priv->connect_finish_log) {
  1872. gf_log (this->name, GF_LOG_INFO,
  1873. "not connected (priv->connected = %d)",
  1874. priv->connected);
  1875. priv->submit_log = 1;
  1876. }
  1877. goto unlock;
  1878. }
  1879. priv->submit_log = 0;
  1880. entry = __socket_ioq_new (this, &reply->msg);
  1881. if (!entry)
  1882. goto unlock;
  1883. if (list_empty (&priv->ioq)) {
  1884. ret = __socket_ioq_churn_entry (this, entry);
  1885. if (ret == 0)
  1886. need_append = 0;
  1887. if (ret > 0)
  1888. need_poll_out = 1;
  1889. }
  1890. if (need_append) {
  1891. list_add_tail (&entry->list, &priv->ioq);
  1892. ret = 0;
  1893. }
  1894. if (need_poll_out) {
  1895. /* first entry to wait. continue writing on POLLOUT */
  1896. priv->idx = event_select_on (ctx->event_pool,
  1897. priv->sock,
  1898. priv->idx, -1, 1);
  1899. }
  1900. }
  1901. unlock:
  1902. pthread_mutex_unlock (&priv->lock);
  1903. out:
  1904. return ret;
  1905. }
  1906. int32_t
  1907. socket_getpeername (rpc_transport_t *this, char *hostname, int hostlen)
  1908. {
  1909. int32_t ret = -1;
  1910. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1911. GF_VALIDATE_OR_GOTO ("socket", hostname, out);
  1912. if (hostlen < (strlen (this->peerinfo.identifier) + 1)) {
  1913. goto out;
  1914. }
  1915. strcpy (hostname, this->peerinfo.identifier);
  1916. ret = 0;
  1917. out:
  1918. return ret;
  1919. }
  1920. int32_t
  1921. socket_getpeeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
  1922. struct sockaddr_storage *sa, socklen_t salen)
  1923. {
  1924. int32_t ret = -1;
  1925. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1926. GF_VALIDATE_OR_GOTO ("socket", sa, out);
  1927. *sa = this->peerinfo.sockaddr;
  1928. if (peeraddr != NULL) {
  1929. ret = socket_getpeername (this, peeraddr, addrlen);
  1930. }
  1931. ret = 0;
  1932. out:
  1933. return ret;
  1934. }
  1935. int32_t
  1936. socket_getmyname (rpc_transport_t *this, char *hostname, int hostlen)
  1937. {
  1938. int32_t ret = -1;
  1939. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1940. GF_VALIDATE_OR_GOTO ("socket", hostname, out);
  1941. if (hostlen < (strlen (this->myinfo.identifier) + 1)) {
  1942. goto out;
  1943. }
  1944. strcpy (hostname, this->myinfo.identifier);
  1945. ret = 0;
  1946. out:
  1947. return ret;
  1948. }
  1949. int32_t
  1950. socket_getmyaddr (rpc_transport_t *this, char *myaddr, int addrlen,
  1951. struct sockaddr_storage *sa, socklen_t salen)
  1952. {
  1953. int32_t ret = 0;
  1954. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1955. GF_VALIDATE_OR_GOTO ("socket", sa, out);
  1956. *sa = this->myinfo.sockaddr;
  1957. if (myaddr != NULL) {
  1958. ret = socket_getmyname (this, myaddr, addrlen);
  1959. }
  1960. out:
  1961. return ret;
  1962. }
  1963. struct rpc_transport_ops tops = {
  1964. .listen = socket_listen,
  1965. .connect = socket_connect,
  1966. .disconnect = socket_disconnect,
  1967. .submit_request = socket_submit_request,
  1968. .submit_reply = socket_submit_reply,
  1969. .get_peername = socket_getpeername,
  1970. .get_peeraddr = socket_getpeeraddr,
  1971. .get_myname = socket_getmyname,
  1972. .get_myaddr = socket_getmyaddr,
  1973. };
  1974. int
  1975. reconfigure (rpc_transport_t *this, dict_t *options)
  1976. {
  1977. socket_private_t *priv = NULL;
  1978. gf_boolean_t tmp_bool = _gf_false;
  1979. char *optstr = NULL;
  1980. int ret = 0;
  1981. uint64_t windowsize = 0;
  1982. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1983. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1984. if (!this || !this->private) {
  1985. ret =-1;
  1986. goto out;
  1987. }
  1988. priv = this->private;
  1989. if (dict_get_str (this->options, "transport.socket.keepalive",
  1990. &optstr) == 0) {
  1991. if (gf_string2boolean (optstr, &tmp_bool) == -1) {
  1992. gf_log (this->name, GF_LOG_ERROR,
  1993. "'transport.socket.keepalive' takes only "
  1994. "boolean options, not taking any action");
  1995. priv->keepalive = 1;
  1996. ret = -1;
  1997. goto out;
  1998. }
  1999. gf_log (this->name, GF_LOG_DEBUG, "Reconfigured transport.socket.keepalive");
  2000. priv->keepalive = tmp_bool;
  2001. }
  2002. else
  2003. priv->keepalive = 1;
  2004. optstr = NULL;
  2005. if (dict_get_str (this->options, "tcp-window-size",
  2006. &optstr) == 0) {
  2007. if (gf_string2bytesize (optstr, &windowsize) != 0) {
  2008. gf_log (this->name, GF_LOG_ERROR,
  2009. "invalid number format: %s", optstr);
  2010. goto out;
  2011. }
  2012. }
  2013. priv->windowsize = (int)windowsize;
  2014. ret = 0;
  2015. out:
  2016. return ret;
  2017. }
  2018. int
  2019. socket_init (rpc_transport_t *this)
  2020. {
  2021. socket_private_t *priv = NULL;
  2022. gf_boolean_t tmp_bool = 0;
  2023. uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
  2024. char *optstr = NULL;
  2025. uint32_t keepalive = 0;
  2026. uint32_t backlog = 0;
  2027. if (this->private) {
  2028. gf_log_callingfn (this->name, GF_LOG_ERROR,
  2029. "double init attempted");
  2030. return -1;
  2031. }
  2032. priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_socket_private_t);
  2033. if (!priv) {
  2034. return -1;
  2035. }
  2036. pthread_mutex_init (&priv->lock, NULL);
  2037. priv->sock = -1;
  2038. priv->idx = -1;
  2039. priv->connected = -1;
  2040. priv->nodelay = 1;
  2041. priv->bio = 0;
  2042. priv->windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
  2043. INIT_LIST_HEAD (&priv->ioq);
  2044. /* All the below section needs 'this->options' to be present */
  2045. if (!this->options)
  2046. goto out;
  2047. if (dict_get (this->options, "non-blocking-io")) {
  2048. optstr = data_to_str (dict_get (this->options,
  2049. "non-blocking-io"));
  2050. if (gf_string2boolean (optstr, &tmp_bool) == -1) {
  2051. gf_log (this->name, GF_LOG_ERROR,
  2052. "'non-blocking-io' takes only boolean options,"
  2053. " not taking any action");
  2054. tmp_bool = 1;
  2055. }
  2056. if (!tmp_bool) {
  2057. priv->bio = 1;
  2058. gf_log (this->name, GF_LOG_WARNING,
  2059. "disabling non-blocking IO");
  2060. }
  2061. }
  2062. optstr = NULL;
  2063. // By default, we enable NODELAY
  2064. if (dict_get (this->options, "transport.socket.nodelay")) {
  2065. optstr = data_to_str (dict_get (this->options,
  2066. "transport.socket.nodelay"));
  2067. if (gf_string2boolean (optstr, &tmp_bool) == -1) {
  2068. gf_log (this->name, GF_LOG_ERROR,
  2069. "'transport.socket.nodelay' takes only "
  2070. "boolean options, not taking any action");
  2071. tmp_bool = 1;
  2072. }
  2073. if (!tmp_bool) {
  2074. priv->nodelay = 0;
  2075. gf_log (this->name, GF_LOG_DEBUG,
  2076. "disabling nodelay");
  2077. }
  2078. }
  2079. optstr = NULL;
  2080. if (dict_get_str (this->options, "tcp-window-size",
  2081. &optstr) == 0) {
  2082. if (gf_string2bytesize (optstr, &windowsize) != 0) {
  2083. gf_log (this->name, GF_LOG_ERROR,
  2084. "invalid number format: %s", optstr);
  2085. return -1;
  2086. }
  2087. }
  2088. priv->windowsize = (int)windowsize;
  2089. optstr = NULL;
  2090. /* Enable Keep-alive by default. */
  2091. priv->keepalive = 1;
  2092. priv->keepaliveintvl = 2;
  2093. priv->keepaliveidle = 20;
  2094. if (dict_get_str (this->options, "transport.socket.keepalive",
  2095. &optstr) == 0) {
  2096. if (gf_string2boolean (optstr, &tmp_bool) == -1) {
  2097. gf_log (this->name, GF_LOG_ERROR,
  2098. "'transport.socket.keepalive' takes only "
  2099. "boolean options, not taking any action");
  2100. tmp_bool = 1;
  2101. }
  2102. if (!tmp_bool)
  2103. priv->keepalive = 0;
  2104. }
  2105. if (dict_get_uint32 (this->options,
  2106. "transport.socket.keepalive-interval",
  2107. &keepalive) == 0) {
  2108. priv->keepaliveintvl = keepalive;
  2109. }
  2110. if (dict_get_uint32 (this->options,
  2111. "transport.socket.keepalive-time",
  2112. &keepalive) == 0) {
  2113. priv->keepaliveidle = keepalive;
  2114. }
  2115. if (dict_get_uint32 (this->options,
  2116. "transport.socket.listen-backlog",
  2117. &backlog) == 0) {
  2118. priv->backlog = backlog;
  2119. }
  2120. optstr = NULL;
  2121. /* Check if socket read failures are to be logged */
  2122. priv->read_fail_log = 1;
  2123. if (dict_get (this->options, "transport.socket.read-fail-log")) {
  2124. optstr = data_to_str (dict_get (this->options, "transport.socket.read-fail-log"));
  2125. if (gf_string2boolean (optstr, &tmp_bool) == -1) {
  2126. gf_log (this->name, GF_LOG_WARNING,
  2127. "'transport.socket.read-fail-log' takes only "
  2128. "boolean options; logging socket read fails");
  2129. }
  2130. else if (tmp_bool == _gf_false) {
  2131. priv->read_fail_log = 0;
  2132. }
  2133. }
  2134. optstr = NULL;
  2135. out:
  2136. this->private = priv;
  2137. return 0;
  2138. }
  2139. void
  2140. fini (rpc_transport_t *this)
  2141. {
  2142. socket_private_t *priv = NULL;
  2143. if (!this)
  2144. return;
  2145. priv = this->private;
  2146. if (priv) {
  2147. if (priv->sock != -1) {
  2148. pthread_mutex_lock (&priv->lock);
  2149. {
  2150. __socket_ioq_flush (this);
  2151. __socket_reset (this);
  2152. }
  2153. pthread_mutex_unlock (&priv->lock);
  2154. }
  2155. gf_log (this->name, GF_LOG_TRACE,
  2156. "transport %p destroyed", this);
  2157. pthread_mutex_destroy (&priv->lock);
  2158. GF_FREE (priv);
  2159. }
  2160. this->private = NULL;
  2161. }
  2162. int32_t
  2163. init (rpc_transport_t *this)
  2164. {
  2165. int ret = -1;
  2166. ret = socket_init (this);
  2167. if (ret == -1) {
  2168. gf_log (this->name, GF_LOG_DEBUG, "socket_init() failed");
  2169. }
  2170. return ret;
  2171. }
  2172. struct volume_options options[] = {
  2173. { .key = {"remote-port",
  2174. "transport.remote-port",
  2175. "transport.socket.remote-port"},
  2176. .type = GF_OPTION_TYPE_INT
  2177. },
  2178. { .key = {"transport.socket.listen-port", "listen-port"},
  2179. .type = GF_OPTION_TYPE_INT
  2180. },
  2181. { .key = {"transport.socket.bind-address", "bind-address" },
  2182. .type = GF_OPTION_TYPE_INTERNET_ADDRESS
  2183. },
  2184. { .key = {"transport.socket.connect-path", "connect-path"},
  2185. .type = GF_OPTION_TYPE_ANY
  2186. },
  2187. { .key = {"transport.socket.bind-path", "bind-path"},
  2188. .type = GF_OPTION_TYPE_ANY
  2189. },
  2190. { .key = {"transport.socket.listen-path", "listen-path"},
  2191. .type = GF_OPTION_TYPE_ANY
  2192. },
  2193. { .key = { "transport.address-family",
  2194. "address-family" },
  2195. .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
  2196. "unix", "inet-sdp" },
  2197. .type = GF_OPTION_TYPE_STR
  2198. },
  2199. { .key = {"non-blocking-io"},
  2200. .type = GF_OPTION_TYPE_BOOL
  2201. },
  2202. { .key = {"tcp-window-size"},
  2203. .type = GF_OPTION_TYPE_SIZET,
  2204. .min = GF_MIN_SOCKET_WINDOW_SIZE,
  2205. .max = GF_MAX_SOCKET_WINDOW_SIZE
  2206. },
  2207. { .key = {"transport.socket.nodelay"},
  2208. .type = GF_OPTION_TYPE_BOOL
  2209. },
  2210. { .key = {"transport.socket.lowlat"},
  2211. .type = GF_OPTION_TYPE_BOOL
  2212. },
  2213. { .key = {"transport.socket.keepalive"},
  2214. .type = GF_OPTION_TYPE_BOOL
  2215. },
  2216. { .key = {"transport.socket.keepalive-interval"},
  2217. .type = GF_OPTION_TYPE_INT
  2218. },
  2219. { .key = {"transport.socket.keepalive-time"},
  2220. .type = GF_OPTION_TYPE_INT
  2221. },
  2222. { .key = {"transport.socket.listen-backlog"},
  2223. .type = GF_OPTION_TYPE_INT
  2224. },
  2225. { .key = {"transport.socket.read-fail-log"},
  2226. .type = GF_OPTION_TYPE_BOOL
  2227. },
  2228. { .key = {NULL} }
  2229. };