PageRenderTime 47ms CodeModel.GetById 8ms RepoModel.GetById 1ms app.codeStats 0ms

/rpc/rpc-transport/socket/src/socket.c

https://github.com/dopry/glusterfs
C | 2808 lines | 2135 code | 576 blank | 97 comment | 425 complexity | b473361e0e485f7043b6ed31936626bf MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.0, GPL-2.0, Apache-2.0, BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
  3. This file is part of GlusterFS.
  4. GlusterFS is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published
  6. by the Free Software Foundation; either version 3 of the License,
  7. or (at your option) any later version.
  8. GlusterFS is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see
  14. <http://www.gnu.org/licenses/>.
  15. */
  16. #ifndef _CONFIG_H
  17. #define _CONFIG_H
  18. #include "config.h"
  19. #endif
  20. #include "socket.h"
  21. #include "name.h"
  22. #include "dict.h"
  23. #include "rpc-transport.h"
  24. #include "logging.h"
  25. #include "xlator.h"
  26. #include "byte-order.h"
  27. #include "common-utils.h"
  28. #include "compat-errno.h"
  29. /* ugly #includes below */
  30. #include "protocol-common.h"
  31. #include "glusterfs3-xdr.h"
  32. #include "xdr-nfs3.h"
  33. #include "rpcsvc.h"
  34. #include <fcntl.h>
  35. #include <errno.h>
  36. #include <netinet/tcp.h>
  37. #include <rpc/xdr.h>
  38. #define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR)
  39. #define SA(ptr) ((struct sockaddr *)ptr)
  40. #define __socket_proto_reset_pending(priv) do { \
  41. memset (&priv->incoming.frag.vector, 0, \
  42. sizeof (priv->incoming.frag.vector)); \
  43. priv->incoming.frag.pending_vector = \
  44. &priv->incoming.frag.vector; \
  45. priv->incoming.frag.pending_vector->iov_base = \
  46. priv->incoming.frag.fragcurrent; \
  47. priv->incoming.pending_vector = \
  48. priv->incoming.frag.pending_vector; \
  49. } while (0);
  50. #define __socket_proto_update_pending(priv) \
  51. do { \
  52. uint32_t remaining_fragsize = 0; \
  53. if (priv->incoming.frag.pending_vector->iov_len == 0) { \
  54. remaining_fragsize = RPC_FRAGSIZE (priv->incoming.fraghdr) \
  55. - priv->incoming.frag.bytes_read; \
  56. \
  57. priv->incoming.frag.pending_vector->iov_len = \
  58. remaining_fragsize > priv->incoming.frag.remaining_size \
  59. ? priv->incoming.frag.remaining_size : remaining_fragsize; \
  60. \
  61. priv->incoming.frag.remaining_size -= \
  62. priv->incoming.frag.pending_vector->iov_len; \
  63. } \
  64. } while (0);
  65. #define __socket_proto_update_priv_after_read(priv, ret, bytes_read) \
  66. { \
  67. priv->incoming.frag.fragcurrent += bytes_read; \
  68. priv->incoming.frag.bytes_read += bytes_read; \
  69. \
  70. if ((ret > 0) || (priv->incoming.frag.remaining_size != 0)) { \
  71. if (priv->incoming.frag.remaining_size != 0 && ret == 0) { \
  72. __socket_proto_reset_pending (priv); \
  73. } \
  74. \
  75. gf_log (this->name, GF_LOG_TRACE, "partial read on non-blocking socket"); \
  76. \
  77. break; \
  78. } \
  79. }
  80. #define __socket_proto_init_pending(priv, size) \
  81. do { \
  82. uint32_t remaining_fragsize = 0; \
  83. remaining_fragsize = RPC_FRAGSIZE (priv->incoming.fraghdr) \
  84. - priv->incoming.frag.bytes_read; \
  85. \
  86. __socket_proto_reset_pending (priv); \
  87. \
  88. priv->incoming.frag.pending_vector->iov_len = \
  89. remaining_fragsize > size ? size : remaining_fragsize; \
  90. \
  91. priv->incoming.frag.remaining_size = \
  92. size - priv->incoming.frag.pending_vector->iov_len; \
  93. \
  94. } while (0);
  95. /* This will be used in a switch case and breaks from the switch case if all
  96. * the pending data is not read.
  97. */
  98. #define __socket_proto_read(priv, ret) \
  99. { \
  100. size_t bytes_read = 0; \
  101. \
  102. __socket_proto_update_pending (priv); \
  103. \
  104. ret = __socket_readv (this, \
  105. priv->incoming.pending_vector, 1, \
  106. &priv->incoming.pending_vector, \
  107. &priv->incoming.pending_count, \
  108. &bytes_read); \
  109. if (ret == -1) { \
  110. gf_log (this->name, GF_LOG_WARNING, \
  111. "reading from socket failed. Error (%s), " \
  112. "peer (%s)", strerror (errno), \
  113. this->peerinfo.identifier); \
  114. break; \
  115. } \
  116. __socket_proto_update_priv_after_read (priv, ret, bytes_read); \
  117. }
  118. int socket_init (rpc_transport_t *this);
  119. /*
  120. * return value:
  121. * 0 = success (completed)
  122. * -1 = error
  123. * > 0 = incomplete
  124. */
  125. int
  126. __socket_rwv (rpc_transport_t *this, struct iovec *vector, int count,
  127. struct iovec **pending_vector, int *pending_count, size_t *bytes,
  128. int write)
  129. {
  130. socket_private_t *priv = NULL;
  131. int sock = -1;
  132. int ret = -1;
  133. struct iovec *opvector = NULL;
  134. int opcount = 0;
  135. int moved = 0;
  136. GF_VALIDATE_OR_GOTO ("socket", this, out);
  137. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  138. priv = this->private;
  139. sock = priv->sock;
  140. opvector = vector;
  141. opcount = count;
  142. if (bytes != NULL) {
  143. *bytes = 0;
  144. }
  145. while (opcount) {
  146. if (write) {
  147. ret = writev (sock, opvector, opcount);
  148. if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
  149. /* done for now */
  150. break;
  151. }
  152. this->total_bytes_write += ret;
  153. } else {
  154. ret = readv (sock, opvector, opcount);
  155. if (ret == -1 && errno == EAGAIN) {
  156. /* done for now */
  157. break;
  158. }
  159. this->total_bytes_read += ret;
  160. }
  161. if (ret == 0) {
  162. /* Mostly due to 'umount' in client */
  163. gf_log (this->name, GF_LOG_DEBUG,
  164. "EOF from peer %s", this->peerinfo.identifier);
  165. opcount = -1;
  166. errno = ENOTCONN;
  167. break;
  168. }
  169. if (ret == -1) {
  170. if (errno == EINTR)
  171. continue;
  172. gf_log (this->name, GF_LOG_WARNING,
  173. "%s failed (%s)", write ? "writev" : "readv",
  174. strerror (errno));
  175. opcount = -1;
  176. break;
  177. }
  178. if (bytes != NULL) {
  179. *bytes += ret;
  180. }
  181. moved = 0;
  182. while (moved < ret) {
  183. if ((ret - moved) >= opvector[0].iov_len) {
  184. moved += opvector[0].iov_len;
  185. opvector++;
  186. opcount--;
  187. } else {
  188. opvector[0].iov_len -= (ret - moved);
  189. opvector[0].iov_base += (ret - moved);
  190. moved += (ret - moved);
  191. }
  192. while (opcount && !opvector[0].iov_len) {
  193. opvector++;
  194. opcount--;
  195. }
  196. }
  197. }
  198. if (pending_vector)
  199. *pending_vector = opvector;
  200. if (pending_count)
  201. *pending_count = opcount;
  202. out:
  203. return opcount;
  204. }
  205. int
  206. __socket_readv (rpc_transport_t *this, struct iovec *vector, int count,
  207. struct iovec **pending_vector, int *pending_count,
  208. size_t *bytes)
  209. {
  210. int ret = -1;
  211. ret = __socket_rwv (this, vector, count,
  212. pending_vector, pending_count, bytes, 0);
  213. return ret;
  214. }
  215. int
  216. __socket_writev (rpc_transport_t *this, struct iovec *vector, int count,
  217. struct iovec **pending_vector, int *pending_count)
  218. {
  219. int ret = -1;
  220. ret = __socket_rwv (this, vector, count,
  221. pending_vector, pending_count, NULL, 1);
  222. return ret;
  223. }
  224. int
  225. __socket_disconnect (rpc_transport_t *this)
  226. {
  227. socket_private_t *priv = NULL;
  228. int ret = -1;
  229. GF_VALIDATE_OR_GOTO ("socket", this, out);
  230. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  231. priv = this->private;
  232. if (priv->sock != -1) {
  233. priv->connected = -1;
  234. ret = shutdown (priv->sock, SHUT_RDWR);
  235. if (ret) {
  236. /* its already disconnected.. no need to understand
  237. why it failed to shutdown in normal cases */
  238. gf_log (this->name, GF_LOG_DEBUG,
  239. "shutdown() returned %d. %s",
  240. ret, strerror (errno));
  241. }
  242. }
  243. out:
  244. return ret;
  245. }
  246. int
  247. __socket_server_bind (rpc_transport_t *this)
  248. {
  249. socket_private_t *priv = NULL;
  250. int ret = -1;
  251. int opt = 1;
  252. int reuse_check_sock = -1;
  253. struct sockaddr_storage unix_addr = {0};
  254. GF_VALIDATE_OR_GOTO ("socket", this, out);
  255. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  256. priv = this->private;
  257. ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR,
  258. &opt, sizeof (opt));
  259. if (ret == -1) {
  260. gf_log (this->name, GF_LOG_ERROR,
  261. "setsockopt() for SO_REUSEADDR failed (%s)",
  262. strerror (errno));
  263. }
  264. /* reuse-address doesn't work for unix type sockets */
  265. if (AF_UNIX == SA (&this->myinfo.sockaddr)->sa_family) {
  266. memcpy (&unix_addr, SA (&this->myinfo.sockaddr),
  267. this->myinfo.sockaddr_len);
  268. reuse_check_sock = socket (AF_UNIX, SOCK_STREAM, 0);
  269. if (reuse_check_sock > 0) {
  270. ret = connect (reuse_check_sock, SA (&unix_addr),
  271. this->myinfo.sockaddr_len);
  272. if ((ret == -1) && (ECONNREFUSED == errno)) {
  273. unlink (((struct sockaddr_un*)&unix_addr)->sun_path);
  274. }
  275. close (reuse_check_sock);
  276. }
  277. }
  278. ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
  279. this->myinfo.sockaddr_len);
  280. if (ret == -1) {
  281. gf_log (this->name, GF_LOG_ERROR,
  282. "binding to %s failed: %s",
  283. this->myinfo.identifier, strerror (errno));
  284. if (errno == EADDRINUSE) {
  285. gf_log (this->name, GF_LOG_ERROR,
  286. "Port is already in use");
  287. }
  288. }
  289. out:
  290. return ret;
  291. }
  292. int
  293. __socket_nonblock (int fd)
  294. {
  295. int flags = 0;
  296. int ret = -1;
  297. flags = fcntl (fd, F_GETFL);
  298. if (flags != -1)
  299. ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
  300. return ret;
  301. }
  302. int
  303. __socket_nodelay (int fd)
  304. {
  305. int on = 1;
  306. int ret = -1;
  307. ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY,
  308. &on, sizeof (on));
  309. if (!ret)
  310. gf_log (THIS->name, GF_LOG_TRACE,
  311. "NODELAY enabled for socket %d", fd);
  312. return ret;
  313. }
  314. static int
  315. __socket_keepalive (int fd, int keepalive_intvl, int keepalive_idle)
  316. {
  317. int on = 1;
  318. int ret = -1;
  319. ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof (on));
  320. if (ret == -1) {
  321. gf_log ("socket", GF_LOG_WARNING,
  322. "failed to set keep alive option on socket %d", fd);
  323. goto err;
  324. }
  325. if (keepalive_intvl == GF_USE_DEFAULT_KEEPALIVE)
  326. goto done;
  327. #if !defined(GF_LINUX_HOST_OS) && !defined(__NetBSD__)
  328. #ifdef GF_SOLARIS_HOST_OS
  329. ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive_intvl,
  330. sizeof (keepalive_intvl));
  331. #else
  332. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_intvl,
  333. sizeof (keepalive_intvl));
  334. #endif
  335. if (ret == -1) {
  336. gf_log ("socket", GF_LOG_WARNING,
  337. "failed to set keep alive interval on socket %d", fd);
  338. goto err;
  339. }
  340. #else
  341. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_idle,
  342. sizeof (keepalive_intvl));
  343. if (ret == -1) {
  344. gf_log ("socket", GF_LOG_WARNING,
  345. "failed to set keep idle on socket %d", fd);
  346. goto err;
  347. }
  348. ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepalive_intvl,
  349. sizeof (keepalive_intvl));
  350. if (ret == -1) {
  351. gf_log ("socket", GF_LOG_WARNING,
  352. "failed to set keep alive interval on socket %d", fd);
  353. goto err;
  354. }
  355. #endif
  356. done:
  357. gf_log (THIS->name, GF_LOG_TRACE, "Keep-alive enabled for socket %d, interval "
  358. "%d, idle: %d", fd, keepalive_intvl, keepalive_idle);
  359. err:
  360. return ret;
  361. }
  362. int
  363. __socket_connect_finish (int fd)
  364. {
  365. int ret = -1;
  366. int optval = 0;
  367. socklen_t optlen = sizeof (int);
  368. ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
  369. if (ret == 0 && optval) {
  370. errno = optval;
  371. ret = -1;
  372. }
  373. return ret;
  374. }
  375. void
  376. __socket_reset (rpc_transport_t *this)
  377. {
  378. socket_private_t *priv = NULL;
  379. GF_VALIDATE_OR_GOTO ("socket", this, out);
  380. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  381. priv = this->private;
  382. /* TODO: use mem-pool on incoming data */
  383. if (priv->incoming.iobref) {
  384. iobref_unref (priv->incoming.iobref);
  385. priv->incoming.iobref = NULL;
  386. }
  387. if (priv->incoming.iobuf) {
  388. iobuf_unref (priv->incoming.iobuf);
  389. }
  390. if (priv->incoming.request_info != NULL) {
  391. GF_FREE (priv->incoming.request_info);
  392. }
  393. memset (&priv->incoming, 0, sizeof (priv->incoming));
  394. event_unregister (this->ctx->event_pool, priv->sock, priv->idx);
  395. close (priv->sock);
  396. priv->sock = -1;
  397. priv->idx = -1;
  398. priv->connected = -1;
  399. out:
  400. return;
  401. }
  402. void
  403. socket_set_lastfrag (uint32_t *fragsize) {
  404. (*fragsize) |= 0x80000000U;
  405. }
  406. void
  407. socket_set_frag_header_size (uint32_t size, char *haddr)
  408. {
  409. size = htonl (size);
  410. memcpy (haddr, &size, sizeof (size));
  411. }
  412. void
  413. socket_set_last_frag_header_size (uint32_t size, char *haddr)
  414. {
  415. socket_set_lastfrag (&size);
  416. socket_set_frag_header_size (size, haddr);
  417. }
  418. struct ioq *
  419. __socket_ioq_new (rpc_transport_t *this, rpc_transport_msg_t *msg)
  420. {
  421. struct ioq *entry = NULL;
  422. int count = 0;
  423. uint32_t size = 0;
  424. GF_VALIDATE_OR_GOTO ("socket", this, out);
  425. /* TODO: use mem-pool */
  426. entry = GF_CALLOC (1, sizeof (*entry), gf_common_mt_ioq);
  427. if (!entry)
  428. return NULL;
  429. count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
  430. GF_ASSERT (count <= (MAX_IOVEC - 1));
  431. size = iov_length (msg->rpchdr, msg->rpchdrcount)
  432. + iov_length (msg->proghdr, msg->proghdrcount)
  433. + iov_length (msg->progpayload, msg->progpayloadcount);
  434. if (size > RPC_MAX_FRAGMENT_SIZE) {
  435. gf_log (this->name, GF_LOG_ERROR,
  436. "msg size (%u) bigger than the maximum allowed size on "
  437. "sockets (%u)", size, RPC_MAX_FRAGMENT_SIZE);
  438. GF_FREE (entry);
  439. return NULL;
  440. }
  441. socket_set_last_frag_header_size (size, (char *)&entry->fraghdr);
  442. entry->vector[0].iov_base = (char *)&entry->fraghdr;
  443. entry->vector[0].iov_len = sizeof (entry->fraghdr);
  444. entry->count = 1;
  445. if (msg->rpchdr != NULL) {
  446. memcpy (&entry->vector[1], msg->rpchdr,
  447. sizeof (struct iovec) * msg->rpchdrcount);
  448. entry->count += msg->rpchdrcount;
  449. }
  450. if (msg->proghdr != NULL) {
  451. memcpy (&entry->vector[entry->count], msg->proghdr,
  452. sizeof (struct iovec) * msg->proghdrcount);
  453. entry->count += msg->proghdrcount;
  454. }
  455. if (msg->progpayload != NULL) {
  456. memcpy (&entry->vector[entry->count], msg->progpayload,
  457. sizeof (struct iovec) * msg->progpayloadcount);
  458. entry->count += msg->progpayloadcount;
  459. }
  460. entry->pending_vector = entry->vector;
  461. entry->pending_count = entry->count;
  462. if (msg->iobref != NULL)
  463. entry->iobref = iobref_ref (msg->iobref);
  464. INIT_LIST_HEAD (&entry->list);
  465. out:
  466. return entry;
  467. }
  468. void
  469. __socket_ioq_entry_free (struct ioq *entry)
  470. {
  471. GF_VALIDATE_OR_GOTO ("socket", entry, out);
  472. list_del_init (&entry->list);
  473. if (entry->iobref)
  474. iobref_unref (entry->iobref);
  475. /* TODO: use mem-pool */
  476. GF_FREE (entry);
  477. out:
  478. return;
  479. }
  480. void
  481. __socket_ioq_flush (rpc_transport_t *this)
  482. {
  483. socket_private_t *priv = NULL;
  484. struct ioq *entry = NULL;
  485. GF_VALIDATE_OR_GOTO ("socket", this, out);
  486. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  487. priv = this->private;
  488. while (!list_empty (&priv->ioq)) {
  489. entry = priv->ioq_next;
  490. __socket_ioq_entry_free (entry);
  491. }
  492. out:
  493. return;
  494. }
  495. int
  496. __socket_ioq_churn_entry (rpc_transport_t *this, struct ioq *entry)
  497. {
  498. int ret = -1;
  499. ret = __socket_writev (this, entry->pending_vector,
  500. entry->pending_count,
  501. &entry->pending_vector,
  502. &entry->pending_count);
  503. if (ret == 0) {
  504. /* current entry was completely written */
  505. GF_ASSERT (entry->pending_count == 0);
  506. __socket_ioq_entry_free (entry);
  507. }
  508. return ret;
  509. }
  510. int
  511. __socket_ioq_churn (rpc_transport_t *this)
  512. {
  513. socket_private_t *priv = NULL;
  514. int ret = 0;
  515. struct ioq *entry = NULL;
  516. GF_VALIDATE_OR_GOTO ("socket", this, out);
  517. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  518. priv = this->private;
  519. while (!list_empty (&priv->ioq)) {
  520. /* pick next entry */
  521. entry = priv->ioq_next;
  522. ret = __socket_ioq_churn_entry (this, entry);
  523. if (ret != 0)
  524. break;
  525. }
  526. if (list_empty (&priv->ioq)) {
  527. /* all pending writes done, not interested in POLLOUT */
  528. priv->idx = event_select_on (this->ctx->event_pool,
  529. priv->sock, priv->idx, -1, 0);
  530. }
  531. out:
  532. return ret;
  533. }
  534. int
  535. socket_event_poll_err (rpc_transport_t *this)
  536. {
  537. socket_private_t *priv = NULL;
  538. int ret = -1;
  539. GF_VALIDATE_OR_GOTO ("socket", this, out);
  540. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  541. priv = this->private;
  542. pthread_mutex_lock (&priv->lock);
  543. {
  544. __socket_ioq_flush (this);
  545. __socket_reset (this);
  546. }
  547. pthread_mutex_unlock (&priv->lock);
  548. rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
  549. out:
  550. return ret;
  551. }
  552. int
  553. socket_event_poll_out (rpc_transport_t *this)
  554. {
  555. socket_private_t *priv = NULL;
  556. int ret = -1;
  557. GF_VALIDATE_OR_GOTO ("socket", this, out);
  558. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  559. priv = this->private;
  560. pthread_mutex_lock (&priv->lock);
  561. {
  562. if (priv->connected == 1) {
  563. ret = __socket_ioq_churn (this);
  564. if (ret == -1) {
  565. __socket_disconnect (this);
  566. }
  567. }
  568. }
  569. pthread_mutex_unlock (&priv->lock);
  570. ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_SENT, NULL);
  571. out:
  572. return ret;
  573. }
  574. inline int
  575. __socket_read_simple_msg (rpc_transport_t *this)
  576. {
  577. socket_private_t *priv = NULL;
  578. int ret = 0;
  579. uint32_t remaining_size = 0;
  580. size_t bytes_read = 0;
  581. GF_VALIDATE_OR_GOTO ("socket", this, out);
  582. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  583. priv = this->private;
  584. switch (priv->incoming.frag.simple_state) {
  585. case SP_STATE_SIMPLE_MSG_INIT:
  586. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  587. - priv->incoming.frag.bytes_read;
  588. __socket_proto_init_pending (priv, remaining_size);
  589. priv->incoming.frag.simple_state =
  590. SP_STATE_READING_SIMPLE_MSG;
  591. /* fall through */
  592. case SP_STATE_READING_SIMPLE_MSG:
  593. ret = 0;
  594. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  595. - priv->incoming.frag.bytes_read;
  596. if (remaining_size > 0) {
  597. ret = __socket_readv (this,
  598. priv->incoming.pending_vector, 1,
  599. &priv->incoming.pending_vector,
  600. &priv->incoming.pending_count,
  601. &bytes_read);
  602. }
  603. if (ret == -1) {
  604. gf_log (this->name, GF_LOG_WARNING,
  605. "reading from socket failed. Error (%s), "
  606. "peer (%s)", strerror (errno),
  607. this->peerinfo.identifier);
  608. break;
  609. }
  610. priv->incoming.frag.bytes_read += bytes_read;
  611. priv->incoming.frag.fragcurrent += bytes_read;
  612. if (ret > 0) {
  613. gf_log (this->name, GF_LOG_TRACE,
  614. "partial read on non-blocking socket.");
  615. break;
  616. }
  617. if (ret == 0) {
  618. priv->incoming.frag.simple_state
  619. = SP_STATE_SIMPLE_MSG_INIT;
  620. }
  621. }
  622. out:
  623. return ret;
  624. }
  625. inline int
  626. __socket_read_simple_request (rpc_transport_t *this)
  627. {
  628. return __socket_read_simple_msg (this);
  629. }
  630. #define rpc_cred_addr(buf) (buf + RPC_MSGTYPE_SIZE + RPC_CALL_BODY_SIZE - 4)
  631. #define rpc_verf_addr(fragcurrent) (fragcurrent - 4)
  632. #define rpc_msgtype_addr(buf) (buf + 4)
  633. #define rpc_prognum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 4)
  634. #define rpc_progver_addr(buf) (buf + RPC_MSGTYPE_SIZE + 8)
  635. #define rpc_procnum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 12)
  636. inline int
  637. __socket_read_vectored_request (rpc_transport_t *this, rpcsvc_vector_sizer vector_sizer)
  638. {
  639. socket_private_t *priv = NULL;
  640. int ret = 0;
  641. uint32_t credlen = 0, verflen = 0;
  642. char *addr = NULL;
  643. struct iobuf *iobuf = NULL;
  644. uint32_t remaining_size = 0;
  645. ssize_t readsize = 0;
  646. size_t size = 0;
  647. GF_VALIDATE_OR_GOTO ("socket", this, out);
  648. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  649. priv = this->private;
  650. switch (priv->incoming.frag.call_body.request.vector_state) {
  651. case SP_STATE_VECTORED_REQUEST_INIT:
  652. priv->incoming.frag.call_body.request.vector_sizer_state = 0;
  653. addr = rpc_cred_addr (iobuf_ptr (priv->incoming.iobuf));
  654. /* also read verf flavour and verflen */
  655. credlen = ntoh32 (*((uint32_t *)addr))
  656. + RPC_AUTH_FLAVOUR_N_LENGTH_SIZE;
  657. __socket_proto_init_pending (priv, credlen);
  658. priv->incoming.frag.call_body.request.vector_state =
  659. SP_STATE_READING_CREDBYTES;
  660. /* fall through */
  661. case SP_STATE_READING_CREDBYTES:
  662. __socket_proto_read (priv, ret);
  663. priv->incoming.frag.call_body.request.vector_state =
  664. SP_STATE_READ_CREDBYTES;
  665. /* fall through */
  666. case SP_STATE_READ_CREDBYTES:
  667. addr = rpc_verf_addr (priv->incoming.frag.fragcurrent);
  668. verflen = ntoh32 (*((uint32_t *)addr));
  669. if (verflen == 0) {
  670. priv->incoming.frag.call_body.request.vector_state
  671. = SP_STATE_READ_VERFBYTES;
  672. goto sp_state_read_verfbytes;
  673. }
  674. __socket_proto_init_pending (priv, verflen);
  675. priv->incoming.frag.call_body.request.vector_state
  676. = SP_STATE_READING_VERFBYTES;
  677. /* fall through */
  678. case SP_STATE_READING_VERFBYTES:
  679. __socket_proto_read (priv, ret);
  680. priv->incoming.frag.call_body.request.vector_state =
  681. SP_STATE_READ_VERFBYTES;
  682. /* fall through */
  683. case SP_STATE_READ_VERFBYTES:
  684. sp_state_read_verfbytes:
  685. priv->incoming.frag.call_body.request.vector_sizer_state =
  686. vector_sizer (priv->incoming.frag.call_body.request.vector_sizer_state,
  687. &readsize,
  688. priv->incoming.frag.fragcurrent);
  689. __socket_proto_init_pending (priv, readsize);
  690. priv->incoming.frag.call_body.request.vector_state
  691. = SP_STATE_READING_PROGHDR;
  692. /* fall through */
  693. case SP_STATE_READING_PROGHDR:
  694. __socket_proto_read (priv, ret);
  695. sp_state_reading_proghdr:
  696. priv->incoming.frag.call_body.request.vector_sizer_state =
  697. vector_sizer (priv->incoming.frag.call_body.request.vector_sizer_state,
  698. &readsize,
  699. priv->incoming.frag.fragcurrent);
  700. if (readsize == 0) {
  701. priv->incoming.frag.call_body.request.vector_state =
  702. SP_STATE_READ_PROGHDR;
  703. } else {
  704. __socket_proto_init_pending (priv, readsize);
  705. __socket_proto_read (priv, ret);
  706. goto sp_state_reading_proghdr;
  707. }
  708. case SP_STATE_READ_PROGHDR:
  709. if (priv->incoming.payload_vector.iov_base == NULL) {
  710. size = RPC_FRAGSIZE (priv->incoming.fraghdr) -
  711. priv->incoming.frag.bytes_read;
  712. iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
  713. if (!iobuf) {
  714. ret = -1;
  715. break;
  716. }
  717. if (priv->incoming.iobref == NULL) {
  718. priv->incoming.iobref = iobref_new ();
  719. if (priv->incoming.iobref == NULL) {
  720. ret = -1;
  721. iobuf_unref (iobuf);
  722. break;
  723. }
  724. }
  725. iobref_add (priv->incoming.iobref, iobuf);
  726. iobuf_unref (iobuf);
  727. priv->incoming.payload_vector.iov_base
  728. = iobuf_ptr (iobuf);
  729. priv->incoming.frag.fragcurrent = iobuf_ptr (iobuf);
  730. }
  731. priv->incoming.frag.call_body.request.vector_state =
  732. SP_STATE_READING_PROG;
  733. /* fall through */
  734. case SP_STATE_READING_PROG:
  735. /* now read the remaining rpc msg into buffer pointed by
  736. * fragcurrent
  737. */
  738. ret = __socket_read_simple_msg (this);
  739. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  740. - priv->incoming.frag.bytes_read;
  741. if ((ret == -1)
  742. || ((ret == 0)
  743. && (remaining_size == 0)
  744. && RPC_LASTFRAG (priv->incoming.fraghdr))) {
  745. priv->incoming.frag.call_body.request.vector_state
  746. = SP_STATE_VECTORED_REQUEST_INIT;
  747. priv->incoming.payload_vector.iov_len
  748. = (unsigned long)priv->incoming.frag.fragcurrent
  749. - (unsigned long)
  750. priv->incoming.payload_vector.iov_base;
  751. }
  752. break;
  753. }
  754. out:
  755. return ret;
  756. }
  757. inline int
  758. __socket_read_request (rpc_transport_t *this)
  759. {
  760. socket_private_t *priv = NULL;
  761. uint32_t prognum = 0, procnum = 0, progver = 0;
  762. uint32_t remaining_size = 0;
  763. int ret = -1;
  764. char *buf = NULL;
  765. rpcsvc_vector_sizer vector_sizer = NULL;
  766. GF_VALIDATE_OR_GOTO ("socket", this, out);
  767. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  768. priv = this->private;
  769. switch (priv->incoming.frag.call_body.request.header_state) {
  770. case SP_STATE_REQUEST_HEADER_INIT:
  771. __socket_proto_init_pending (priv, RPC_CALL_BODY_SIZE);
  772. priv->incoming.frag.call_body.request.header_state
  773. = SP_STATE_READING_RPCHDR1;
  774. /* fall through */
  775. case SP_STATE_READING_RPCHDR1:
  776. __socket_proto_read (priv, ret);
  777. priv->incoming.frag.call_body.request.header_state =
  778. SP_STATE_READ_RPCHDR1;
  779. /* fall through */
  780. case SP_STATE_READ_RPCHDR1:
  781. buf = rpc_prognum_addr (iobuf_ptr (priv->incoming.iobuf));
  782. prognum = ntoh32 (*((uint32_t *)buf));
  783. buf = rpc_progver_addr (iobuf_ptr (priv->incoming.iobuf));
  784. progver = ntoh32 (*((uint32_t *)buf));
  785. buf = rpc_procnum_addr (iobuf_ptr (priv->incoming.iobuf));
  786. procnum = ntoh32 (*((uint32_t *)buf));
  787. if (this->listener) {
  788. /* this check is needed as rpcsvc and rpc-clnt actor structures are
  789. * not same */
  790. vector_sizer = rpcsvc_get_program_vector_sizer ((rpcsvc_t *)this->mydata,
  791. prognum, progver, procnum);
  792. }
  793. if (vector_sizer) {
  794. ret = __socket_read_vectored_request (this, vector_sizer);
  795. } else {
  796. ret = __socket_read_simple_request (this);
  797. }
  798. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  799. - priv->incoming.frag.bytes_read;
  800. if ((ret == -1)
  801. || ((ret == 0)
  802. && (remaining_size == 0)
  803. && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
  804. priv->incoming.frag.call_body.request.header_state =
  805. SP_STATE_REQUEST_HEADER_INIT;
  806. }
  807. break;
  808. }
  809. out:
  810. return ret;
  811. }
  812. inline int
  813. __socket_read_accepted_successful_reply (rpc_transport_t *this)
  814. {
  815. socket_private_t *priv = NULL;
  816. int ret = 0;
  817. struct iobuf *iobuf = NULL;
  818. uint32_t gluster_read_rsp_hdr_len = 0;
  819. gfs3_read_rsp read_rsp = {0, };
  820. size_t size = 0;
  821. GF_VALIDATE_OR_GOTO ("socket", this, out);
  822. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  823. priv = this->private;
  824. switch (priv->incoming.frag.call_body.reply.accepted_success_state) {
  825. case SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT:
  826. gluster_read_rsp_hdr_len = xdr_sizeof ((xdrproc_t) xdr_gfs3_read_rsp,
  827. &read_rsp);
  828. if (gluster_read_rsp_hdr_len == 0) {
  829. gf_log (this->name, GF_LOG_ERROR,
  830. "xdr_sizeof on gfs3_read_rsp failed");
  831. ret = -1;
  832. goto out;
  833. }
  834. __socket_proto_init_pending (priv, gluster_read_rsp_hdr_len);
  835. priv->incoming.frag.call_body.reply.accepted_success_state
  836. = SP_STATE_READING_PROC_HEADER;
  837. /* fall through */
  838. case SP_STATE_READING_PROC_HEADER:
  839. __socket_proto_read (priv, ret);
  840. priv->incoming.frag.call_body.reply.accepted_success_state
  841. = SP_STATE_READ_PROC_HEADER;
  842. if (priv->incoming.payload_vector.iov_base == NULL) {
  843. size = (RPC_FRAGSIZE (priv->incoming.fraghdr) -
  844. priv->incoming.frag.bytes_read);
  845. iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
  846. if (iobuf == NULL) {
  847. ret = -1;
  848. goto out;
  849. }
  850. if (priv->incoming.iobref == NULL) {
  851. priv->incoming.iobref = iobref_new ();
  852. if (priv->incoming.iobref == NULL) {
  853. ret = -1;
  854. iobuf_unref (iobuf);
  855. goto out;
  856. }
  857. }
  858. iobref_add (priv->incoming.iobref, iobuf);
  859. iobuf_unref (iobuf);
  860. priv->incoming.payload_vector.iov_base
  861. = iobuf_ptr (iobuf);
  862. priv->incoming.payload_vector.iov_len = size;
  863. }
  864. priv->incoming.frag.fragcurrent
  865. = priv->incoming.payload_vector.iov_base;
  866. /* fall through */
  867. case SP_STATE_READ_PROC_HEADER:
  868. /* now read the entire remaining msg into new iobuf */
  869. ret = __socket_read_simple_msg (this);
  870. if ((ret == -1)
  871. || ((ret == 0)
  872. && RPC_LASTFRAG (priv->incoming.fraghdr))) {
  873. priv->incoming.frag.call_body.reply.accepted_success_state
  874. = SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
  875. }
  876. break;
  877. }
  878. out:
  879. return ret;
  880. }
  881. #define rpc_reply_verflen_addr(fragcurrent) ((char *)fragcurrent - 4)
  882. #define rpc_reply_accept_status_addr(fragcurrent) ((char *)fragcurrent - 4)
  883. inline int
  884. __socket_read_accepted_reply (rpc_transport_t *this)
  885. {
  886. socket_private_t *priv = NULL;
  887. int ret = -1;
  888. char *buf = NULL;
  889. uint32_t verflen = 0, len = 0;
  890. uint32_t remaining_size = 0;
  891. GF_VALIDATE_OR_GOTO ("socket", this, out);
  892. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  893. priv = this->private;
  894. switch (priv->incoming.frag.call_body.reply.accepted_state) {
  895. case SP_STATE_ACCEPTED_REPLY_INIT:
  896. __socket_proto_init_pending (priv,
  897. RPC_AUTH_FLAVOUR_N_LENGTH_SIZE);
  898. priv->incoming.frag.call_body.reply.accepted_state
  899. = SP_STATE_READING_REPLY_VERFLEN;
  900. /* fall through */
  901. case SP_STATE_READING_REPLY_VERFLEN:
  902. __socket_proto_read (priv, ret);
  903. priv->incoming.frag.call_body.reply.accepted_state
  904. = SP_STATE_READ_REPLY_VERFLEN;
  905. /* fall through */
  906. case SP_STATE_READ_REPLY_VERFLEN:
  907. buf = rpc_reply_verflen_addr (priv->incoming.frag.fragcurrent);
  908. verflen = ntoh32 (*((uint32_t *) buf));
  909. /* also read accept status along with verf data */
  910. len = verflen + RPC_ACCEPT_STATUS_LEN;
  911. __socket_proto_init_pending (priv, len);
  912. priv->incoming.frag.call_body.reply.accepted_state
  913. = SP_STATE_READING_REPLY_VERFBYTES;
  914. /* fall through */
  915. case SP_STATE_READING_REPLY_VERFBYTES:
  916. __socket_proto_read (priv, ret);
  917. priv->incoming.frag.call_body.reply.accepted_state
  918. = SP_STATE_READ_REPLY_VERFBYTES;
  919. buf = rpc_reply_accept_status_addr (priv->incoming.frag.fragcurrent);
  920. priv->incoming.frag.call_body.reply.accept_status
  921. = ntoh32 (*(uint32_t *) buf);
  922. /* fall through */
  923. case SP_STATE_READ_REPLY_VERFBYTES:
  924. if (priv->incoming.frag.call_body.reply.accept_status
  925. == SUCCESS) {
  926. ret = __socket_read_accepted_successful_reply (this);
  927. } else {
  928. /* read entire remaining msg into buffer pointed to by
  929. * fragcurrent
  930. */
  931. ret = __socket_read_simple_msg (this);
  932. }
  933. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  934. - priv->incoming.frag.bytes_read;
  935. if ((ret == -1)
  936. || ((ret == 0)
  937. && (remaining_size == 0)
  938. && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
  939. priv->incoming.frag.call_body.reply.accepted_state
  940. = SP_STATE_ACCEPTED_REPLY_INIT;
  941. }
  942. break;
  943. }
  944. out:
  945. return ret;
  946. }
  947. inline int
  948. __socket_read_denied_reply (rpc_transport_t *this)
  949. {
  950. return __socket_read_simple_msg (this);
  951. }
  952. #define rpc_reply_status_addr(fragcurrent) ((char *)fragcurrent - 4)
  953. inline int
  954. __socket_read_vectored_reply (rpc_transport_t *this)
  955. {
  956. socket_private_t *priv = NULL;
  957. int ret = 0;
  958. char *buf = NULL;
  959. uint32_t remaining_size = 0;
  960. GF_VALIDATE_OR_GOTO ("socket", this, out);
  961. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  962. priv = this->private;
  963. switch (priv->incoming.frag.call_body.reply.status_state) {
  964. case SP_STATE_ACCEPTED_REPLY_INIT:
  965. __socket_proto_init_pending (priv, RPC_REPLY_STATUS_SIZE);
  966. priv->incoming.frag.call_body.reply.status_state
  967. = SP_STATE_READING_REPLY_STATUS;
  968. /* fall through */
  969. case SP_STATE_READING_REPLY_STATUS:
  970. __socket_proto_read (priv, ret);
  971. buf = rpc_reply_status_addr (priv->incoming.frag.fragcurrent);
  972. priv->incoming.frag.call_body.reply.accept_status
  973. = ntoh32 (*((uint32_t *) buf));
  974. priv->incoming.frag.call_body.reply.status_state
  975. = SP_STATE_READ_REPLY_STATUS;
  976. /* fall through */
  977. case SP_STATE_READ_REPLY_STATUS:
  978. if (priv->incoming.frag.call_body.reply.accept_status
  979. == MSG_ACCEPTED) {
  980. ret = __socket_read_accepted_reply (this);
  981. } else {
  982. ret = __socket_read_denied_reply (this);
  983. }
  984. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  985. - priv->incoming.frag.bytes_read;
  986. if ((ret == -1)
  987. || ((ret == 0)
  988. && (remaining_size == 0)
  989. && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
  990. priv->incoming.frag.call_body.reply.status_state
  991. = SP_STATE_ACCEPTED_REPLY_INIT;
  992. priv->incoming.payload_vector.iov_len
  993. = (unsigned long)priv->incoming.frag.fragcurrent
  994. - (unsigned long)
  995. priv->incoming.payload_vector.iov_base;
  996. }
  997. break;
  998. }
  999. out:
  1000. return ret;
  1001. }
  1002. inline int
  1003. __socket_read_simple_reply (rpc_transport_t *this)
  1004. {
  1005. return __socket_read_simple_msg (this);
  1006. }
  1007. #define rpc_xid_addr(buf) (buf)
  1008. inline int
  1009. __socket_read_reply (rpc_transport_t *this)
  1010. {
  1011. socket_private_t *priv = NULL;
  1012. char *buf = NULL;
  1013. int32_t ret = -1;
  1014. rpc_request_info_t *request_info = NULL;
  1015. char map_xid = 0;
  1016. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1017. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1018. priv = this->private;
  1019. buf = rpc_xid_addr (iobuf_ptr (priv->incoming.iobuf));
  1020. if (priv->incoming.request_info == NULL) {
  1021. priv->incoming.request_info = GF_CALLOC (1,
  1022. sizeof (*request_info),
  1023. gf_common_mt_rpc_trans_reqinfo_t);
  1024. if (priv->incoming.request_info == NULL) {
  1025. goto out;
  1026. }
  1027. map_xid = 1;
  1028. }
  1029. request_info = priv->incoming.request_info;
  1030. if (map_xid) {
  1031. request_info->xid = ntoh32 (*((uint32_t *) buf));
  1032. /* release priv->lock, so as to avoid deadlock b/w conn->lock
  1033. * and priv->lock, since we are doing an upcall here.
  1034. */
  1035. pthread_mutex_unlock (&priv->lock);
  1036. {
  1037. ret = rpc_transport_notify (this,
  1038. RPC_TRANSPORT_MAP_XID_REQUEST,
  1039. priv->incoming.request_info);
  1040. }
  1041. pthread_mutex_lock (&priv->lock);
  1042. if (ret == -1) {
  1043. gf_log (this->name, GF_LOG_WARNING,
  1044. "notify for event MAP_XID failed");
  1045. goto out;
  1046. }
  1047. }
  1048. if ((request_info->prognum == GLUSTER3_1_FOP_PROGRAM)
  1049. && (request_info->procnum == GF_FOP_READ)) {
  1050. if (map_xid && request_info->rsp.rsp_payload_count != 0) {
  1051. priv->incoming.iobref
  1052. = iobref_ref (request_info->rsp.rsp_iobref);
  1053. priv->incoming.payload_vector
  1054. = *request_info->rsp.rsp_payload;
  1055. }
  1056. ret = __socket_read_vectored_reply (this);
  1057. } else {
  1058. ret = __socket_read_simple_reply (this);
  1059. }
  1060. out:
  1061. return ret;
  1062. }
  1063. /* returns the number of bytes yet to be read in a fragment */
  1064. inline int
  1065. __socket_read_frag (rpc_transport_t *this)
  1066. {
  1067. socket_private_t *priv = NULL;
  1068. int32_t ret = 0;
  1069. char *buf = NULL;
  1070. uint32_t remaining_size = 0;
  1071. GF_VALIDATE_OR_GOTO ("socket", this, out);
  1072. GF_VALIDATE_OR_GOTO ("socket", this->private, out);
  1073. priv = this->private;
  1074. switch (priv->incoming.frag.state) {
  1075. case SP_STATE_NADA:
  1076. __socket_proto_init_pending (priv, RPC_MSGTYPE_SIZE);
  1077. priv->incoming.frag.state = SP_STATE_READING_MSGTYPE;
  1078. /* fall through */
  1079. case SP_STATE_READING_MSGTYPE:
  1080. __socket_proto_read (priv, ret);
  1081. priv->incoming.frag.state = SP_STATE_READ_MSGTYPE;
  1082. /* fall through */
  1083. case SP_STATE_READ_MSGTYPE:
  1084. buf = rpc_msgtype_addr (iobuf_ptr (priv->incoming.iobuf));
  1085. priv->incoming.msg_type = ntoh32 (*((uint32_t *)buf));
  1086. if (priv->incoming.msg_type == CALL) {
  1087. ret = __socket_read_request (this);
  1088. } else if (priv->incoming.msg_type == REPLY) {
  1089. ret = __socket_read_reply (this);
  1090. } else if (priv->incoming.msg_type == GF_UNIVERSAL_ANSWER) {
  1091. gf_log ("rpc", GF_LOG_ERROR,
  1092. "older version of protocol/process trying to "
  1093. "connect from %s. use newer version on that node",
  1094. this->peerinfo.identifier);
  1095. } else {
  1096. gf_log ("rpc", GF_LOG_ERROR,
  1097. "wrong MSG-TYPE (%d) received from %s",
  1098. priv->incoming.msg_type,
  1099. this->peerinfo.identifier);
  1100. ret = -1;
  1101. }
  1102. remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
  1103. - priv->incoming.frag.bytes_read;
  1104. if ((ret == -1)
  1105. || ((ret == 0)
  1106. && (remaining_size == 0)
  1107. && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
  1108. priv->incoming.frag.state = SP_STATE_NADA;
  1109. }
  1110. break;
  1111. }
  1112. out:
  1113. return ret;
  1114. }
  1115. inline
  1116. void __socket_reset_priv (socket_private_t *priv)
  1117. {
  1118. if (priv->incoming.iobref) {
  1119. iobref_unref (priv->incoming.iobref);
  1120. priv->incoming.iobref = NULL;
  1121. }
  1122. if (priv->incoming.iobuf) {
  1123. iobuf_unref (priv->incoming.iobuf);
  1124. }
  1125. if (priv->incoming.request_info != NULL) {
  1126. GF_FREE (priv->incoming.request_info);
  1127. priv->incoming.request_info = NULL;
  1128. }
  1129. memset (&priv->incoming.payload_vector, 0,
  1130. sizeof (priv->incoming.payload_vector));
  1131. priv->incoming.iobuf = NULL;
  1132. }
  1133. int
  1134. __socket_proto_state_machine (rpc_transport_t *this,

Large files files are truncated, but you can click here to view the full file