/glusterfs-3.3.0/xlators/mgmt/glusterd/src/glusterd-brick-ops.c

# · C · 1646 lines · 1332 code · 217 blank · 97 comment · 250 complexity · d1080a0cb0f67d30821fff363a4e8833 MD5 · raw file

Large files are truncated click here to view the full file

  1. /*
  2. Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
  3. This file is part of GlusterFS.
  4. GlusterFS is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published
  6. by the Free Software Foundation; either version 3 of the License,
  7. or (at your option) any later version.
  8. GlusterFS is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see
  14. <http://www.gnu.org/licenses/>.
  15. */
  16. #ifndef _CONFIG_H
  17. #define _CONFIG_H
  18. #include "config.h"
  19. #endif
  20. #include "common-utils.h"
  21. #include "cli1-xdr.h"
  22. #include "xdr-generic.h"
  23. #include "glusterd.h"
  24. #include "glusterd-op-sm.h"
  25. #include "glusterd-store.h"
  26. #include "glusterd-utils.h"
  27. #include "glusterd-volgen.h"
  28. #include "run.h"
  29. #include <sys/signal.h>
  30. /* misc */
  31. /* In this function, we decide, based on the 'count' of the brick,
  32. where to add it in the current volume. 'count' tells us already
  33. how many of the given bricks are added. other argument are self-
  34. descriptive. */
  35. int
  36. add_brick_at_right_order (glusterd_brickinfo_t *brickinfo,
  37. glusterd_volinfo_t *volinfo, int count,
  38. int32_t stripe_cnt, int32_t replica_cnt)
  39. {
  40. int idx = 0;
  41. int i = 0;
  42. int sub_cnt = 0;
  43. glusterd_brickinfo_t *brick = NULL;
  44. /* The complexity of the function is in deciding at which index
  45. to add new brick. Even though it can be defined with a complex
  46. single formula for all volume, it is seperated out to make it
  47. more readable */
  48. if (stripe_cnt) {
  49. /* common formula when 'stripe_count' is set */
  50. /* idx = ((count / ((stripe_cnt * volinfo->replica_count) -
  51. volinfo->dist_leaf_count)) * volinfo->dist_leaf_count) +
  52. (count + volinfo->dist_leaf_count);
  53. */
  54. sub_cnt = volinfo->dist_leaf_count;
  55. idx = ((count / ((stripe_cnt * volinfo->replica_count) -
  56. sub_cnt)) * sub_cnt) +
  57. (count + sub_cnt);
  58. goto insert_brick;
  59. }
  60. /* replica count is set */
  61. /* common formula when 'replica_count' is set */
  62. /* idx = ((count / (replica_cnt - existing_replica_count)) *
  63. existing_replica_count) +
  64. (count + existing_replica_count);
  65. */
  66. sub_cnt = volinfo->replica_count;
  67. idx = (count / (replica_cnt - sub_cnt) * sub_cnt) +
  68. (count + sub_cnt);
  69. insert_brick:
  70. i = 0;
  71. list_for_each_entry (brick, &volinfo->bricks, brick_list) {
  72. i++;
  73. if (i < idx)
  74. continue;
  75. gf_log (THIS->name, GF_LOG_DEBUG, "brick:%s index=%d, count=%d",
  76. brick->path, idx, count);
  77. list_add (&brickinfo->brick_list, &brick->brick_list);
  78. break;
  79. }
  80. return 0;
  81. }
  82. static int
  83. gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
  84. int total_bricks, int *type, char *err_str,
  85. size_t err_len)
  86. {
  87. int ret = -1;
  88. switch (volinfo->type) {
  89. case GF_CLUSTER_TYPE_NONE:
  90. if ((volinfo->brick_count * stripe_count) == total_bricks) {
  91. /* Change the volume type */
  92. *type = GF_CLUSTER_TYPE_STRIPE;
  93. gf_log (THIS->name, GF_LOG_INFO,
  94. "Changing the type of volume %s from "
  95. "'distribute' to 'stripe'", volinfo->volname);
  96. ret = 0;
  97. goto out;
  98. } else {
  99. snprintf (err_str, err_len, "Incorrect number of "
  100. "bricks (%d) supplied for stripe count (%d).",
  101. (total_bricks - volinfo->brick_count),
  102. stripe_count);
  103. gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
  104. goto out;
  105. }
  106. break;
  107. case GF_CLUSTER_TYPE_REPLICATE:
  108. if (!(total_bricks % (volinfo->replica_count * stripe_count))) {
  109. /* Change the volume type */
  110. *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
  111. gf_log (THIS->name, GF_LOG_INFO,
  112. "Changing the type of volume %s from "
  113. "'replicate' to 'replicate-stripe'",
  114. volinfo->volname);
  115. ret = 0;
  116. goto out;
  117. } else {
  118. snprintf (err_str, err_len, "Incorrect number of "
  119. "bricks (%d) supplied for changing volume's "
  120. "stripe count to %d, need at least %d bricks",
  121. (total_bricks - volinfo->brick_count),
  122. stripe_count,
  123. (volinfo->replica_count * stripe_count));
  124. gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
  125. goto out;
  126. }
  127. break;
  128. case GF_CLUSTER_TYPE_STRIPE:
  129. case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
  130. if (stripe_count < volinfo->stripe_count) {
  131. snprintf (err_str, err_len,
  132. "Incorrect stripe count (%d) supplied. "
  133. "Volume already has stripe count (%d)",
  134. stripe_count, volinfo->stripe_count);
  135. gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
  136. goto out;
  137. }
  138. if (stripe_count == volinfo->stripe_count) {
  139. if (!(total_bricks % volinfo->dist_leaf_count)) {
  140. /* its same as the one which exists */
  141. ret = 1;
  142. goto out;
  143. }
  144. }
  145. if (stripe_count > volinfo->stripe_count) {
  146. /* We have to make sure before and after 'add-brick',
  147. the number or subvolumes for distribute will remain
  148. same, when stripe count is given */
  149. if ((volinfo->brick_count * (stripe_count *
  150. volinfo->replica_count)) ==
  151. (total_bricks * volinfo->dist_leaf_count)) {
  152. /* Change the dist_leaf_count */
  153. gf_log (THIS->name, GF_LOG_INFO,
  154. "Changing the stripe count of "
  155. "volume %s from %d to %d",
  156. volinfo->volname,
  157. volinfo->stripe_count, stripe_count);
  158. ret = 0;
  159. goto out;
  160. }
  161. }
  162. break;
  163. }
  164. out:
  165. return ret;
  166. }
  167. static int
  168. gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
  169. int total_bricks, int *type, char *err_str,
  170. int err_len)
  171. {
  172. int ret = -1;
  173. /* replica count is set */
  174. switch (volinfo->type) {
  175. case GF_CLUSTER_TYPE_NONE:
  176. if ((volinfo->brick_count * replica_count) == total_bricks) {
  177. /* Change the volume type */
  178. *type = GF_CLUSTER_TYPE_REPLICATE;
  179. gf_log (THIS->name, GF_LOG_INFO,
  180. "Changing the type of volume %s from "
  181. "'distribute' to 'replica'", volinfo->volname);
  182. ret = 0;
  183. goto out;
  184. } else {
  185. snprintf (err_str, err_len, "Incorrect number of "
  186. "bricks (%d) supplied for replica count (%d).",
  187. (total_bricks - volinfo->brick_count),
  188. replica_count);
  189. gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
  190. goto out;
  191. }
  192. break;
  193. case GF_CLUSTER_TYPE_STRIPE:
  194. if (!(total_bricks % (volinfo->dist_leaf_count * replica_count))) {
  195. /* Change the volume type */
  196. *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
  197. gf_log (THIS->name, GF_LOG_INFO,
  198. "Changing the type of volume %s from "
  199. "'stripe' to 'replicate-stripe'",
  200. volinfo->volname);
  201. ret = 0;
  202. goto out;
  203. } else {
  204. snprintf (err_str, err_len, "Incorrect number of "
  205. "bricks (%d) supplied for changing volume's "
  206. "replica count to %d, need at least %d "
  207. "bricks",
  208. (total_bricks - volinfo->brick_count),
  209. replica_count, (volinfo->dist_leaf_count *
  210. replica_count));
  211. gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
  212. goto out;
  213. }
  214. break;
  215. case GF_CLUSTER_TYPE_REPLICATE:
  216. case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
  217. if (replica_count < volinfo->replica_count) {
  218. snprintf (err_str, err_len,
  219. "Incorrect replica count (%d) supplied. "
  220. "Volume already has (%d)",
  221. replica_count, volinfo->replica_count);
  222. gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
  223. goto out;
  224. }
  225. if (replica_count == volinfo->replica_count) {
  226. if (!(total_bricks % volinfo->dist_leaf_count)) {
  227. ret = 1;
  228. goto out;
  229. }
  230. }
  231. if (replica_count > volinfo->replica_count) {
  232. /* We have to make sure before and after 'add-brick',
  233. the number or subvolumes for distribute will remain
  234. same, when replica count is given */
  235. if ((total_bricks * volinfo->dist_leaf_count) ==
  236. (volinfo->brick_count * (replica_count *
  237. volinfo->stripe_count))) {
  238. /* Change the dist_leaf_count */
  239. gf_log (THIS->name, GF_LOG_INFO,
  240. "Changing the replica count of "
  241. "volume %s from %d to %d",
  242. volinfo->volname, volinfo->replica_count,
  243. replica_count);
  244. ret = 0;
  245. goto out;
  246. }
  247. }
  248. break;
  249. }
  250. out:
  251. return ret;
  252. }
  253. static int
  254. gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
  255. int32_t replica_count,
  256. int32_t brick_count, char *err_str,
  257. size_t err_len)
  258. {
  259. int ret = -1;
  260. int replica_nodes = 0;
  261. switch (volinfo->type) {
  262. case GF_CLUSTER_TYPE_NONE:
  263. case GF_CLUSTER_TYPE_STRIPE:
  264. snprintf (err_str, err_len,
  265. "replica count (%d) option given for non replicate "
  266. "volume %s", replica_count, volinfo->volname);
  267. gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
  268. goto out;
  269. case GF_CLUSTER_TYPE_REPLICATE:
  270. case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
  271. /* in remove brick, you can only reduce the replica count */
  272. if (replica_count > volinfo->replica_count) {
  273. snprintf (err_str, err_len,
  274. "given replica count (%d) option is more "
  275. "than volume %s's replica count (%d)",
  276. replica_count, volinfo->volname,
  277. volinfo->replica_count);
  278. gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
  279. goto out;
  280. }
  281. if (replica_count == volinfo->replica_count) {
  282. /* This means the 'replica N' option on CLI was
  283. redundant. Check if the total number of bricks given
  284. for removal is same as 'dist_leaf_count' */
  285. if (brick_count % volinfo->dist_leaf_count) {
  286. snprintf (err_str, err_len,
  287. "number of bricks provided (%d) is "
  288. "not valid. need at least %d "
  289. "(or %dxN)", brick_count,
  290. volinfo->dist_leaf_count,
  291. volinfo->dist_leaf_count);
  292. gf_log (THIS->name, GF_LOG_WARNING, "%s",
  293. err_str);
  294. goto out;
  295. }
  296. ret = 1;
  297. goto out;
  298. }
  299. replica_nodes = ((volinfo->brick_count /
  300. volinfo->replica_count) *
  301. (volinfo->replica_count - replica_count));
  302. if (brick_count % replica_nodes) {
  303. snprintf (err_str, err_len,
  304. "need %d(xN) bricks for reducing replica "
  305. "count of the volume from %d to %d",
  306. replica_nodes, volinfo->replica_count,
  307. replica_count);
  308. goto out;
  309. }
  310. break;
  311. }
  312. ret = 0;
  313. out:
  314. return ret;
  315. }
  316. /* Handler functions */
  317. int
  318. glusterd_handle_add_brick (rpcsvc_request_t *req)
  319. {
  320. int32_t ret = -1;
  321. gf_cli_req cli_req = {{0,}};
  322. dict_t *dict = NULL;
  323. char *bricks = NULL;
  324. char *volname = NULL;
  325. int brick_count = 0;
  326. char *brick_list = NULL;
  327. void *cli_rsp = NULL;
  328. char err_str[2048] = {0,};
  329. gf_cli_rsp rsp = {0,};
  330. glusterd_volinfo_t *volinfo = NULL;
  331. xlator_t *this = NULL;
  332. int total_bricks = 0;
  333. int32_t replica_count = 0;
  334. int32_t stripe_count = 0;
  335. int type = 0;
  336. this = THIS;
  337. GF_ASSERT(this);
  338. GF_ASSERT (req);
  339. if (!xdr_to_generic (req->msg[0], &cli_req,
  340. (xdrproc_t)xdr_gf_cli_req)) {
  341. //failed to decode msg;
  342. req->rpc_err = GARBAGE_ARGS;
  343. snprintf (err_str, sizeof (err_str), "Garbage args received");
  344. goto out;
  345. }
  346. gf_log ("glusterd", GF_LOG_INFO, "Received add brick req");
  347. if (cli_req.dict.dict_len) {
  348. /* Unserialize the dictionary */
  349. dict = dict_new ();
  350. ret = dict_unserialize (cli_req.dict.dict_val,
  351. cli_req.dict.dict_len,
  352. &dict);
  353. if (ret < 0) {
  354. gf_log ("glusterd", GF_LOG_ERROR,
  355. "failed to "
  356. "unserialize req-buffer to dictionary");
  357. snprintf (err_str, sizeof (err_str), "Unable to decode "
  358. "the buffer");
  359. goto out;
  360. }
  361. }
  362. ret = dict_get_str (dict, "volname", &volname);
  363. gf_cmd_log ("Volume add-brick", "on volname: %s attempted",
  364. volname);
  365. if (ret) {
  366. gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
  367. snprintf (err_str, sizeof (err_str), "Unable to get volume "
  368. "name");
  369. goto out;
  370. }
  371. if (!(ret = glusterd_check_volume_exists (volname))) {
  372. ret = -1;
  373. snprintf(err_str, 2048, "Volume %s does not exist", volname);
  374. gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
  375. goto out;
  376. }
  377. ret = dict_get_int32 (dict, "count", &brick_count);
  378. if (ret) {
  379. gf_log ("", GF_LOG_ERROR, "Unable to get count");
  380. snprintf (err_str, sizeof (err_str), "Unable to get volume "
  381. "brick count");
  382. goto out;
  383. }
  384. ret = dict_get_int32 (dict, "replica-count", &replica_count);
  385. if (!ret) {
  386. gf_log (THIS->name, GF_LOG_INFO, "replica-count is %d",
  387. replica_count);
  388. }
  389. ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
  390. if (!ret) {
  391. gf_log (THIS->name, GF_LOG_INFO, "stripe-count is %d",
  392. stripe_count);
  393. }
  394. ret = glusterd_volinfo_find (volname, &volinfo);
  395. if (ret) {
  396. snprintf (err_str, sizeof (err_str), "Unable to get volinfo "
  397. "for volume name %s", volname);
  398. gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
  399. goto out;
  400. }
  401. total_bricks = volinfo->brick_count + brick_count;
  402. if (!stripe_count && !replica_count) {
  403. if (volinfo->type == GF_CLUSTER_TYPE_NONE)
  404. goto brick_val;
  405. if ((volinfo->brick_count < volinfo->dist_leaf_count) &&
  406. (total_bricks <= volinfo->dist_leaf_count))
  407. goto brick_val;
  408. if ((brick_count % volinfo->dist_leaf_count) != 0) {
  409. snprintf(err_str, 2048, "Incorrect number of bricks"
  410. " supplied %d with count %d",
  411. brick_count, volinfo->dist_leaf_count);
  412. gf_log("glusterd", GF_LOG_ERROR, "%s", err_str);
  413. ret = -1;
  414. goto out;
  415. }
  416. goto brick_val;
  417. /* done with validation.. below section is if stripe|replica
  418. count is given */
  419. }
  420. /* These bricks needs to be added one per a replica or stripe volume */
  421. if (stripe_count) {
  422. ret = gd_addbr_validate_stripe_count (volinfo, stripe_count,
  423. total_bricks, &type,
  424. err_str,
  425. sizeof (err_str));
  426. if (ret == -1) {
  427. gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
  428. goto out;
  429. }
  430. /* if stripe count is same as earlier, set it back to 0 */
  431. if (ret == 1)
  432. stripe_count = 0;
  433. ret = dict_set_int32 (dict, "stripe-count", stripe_count);
  434. if (ret) {
  435. gf_log (THIS->name, GF_LOG_ERROR,
  436. "failed to set the stripe-count in dict");
  437. goto out;
  438. }
  439. goto brick_val;
  440. }
  441. ret = gd_addbr_validate_replica_count (volinfo, replica_count,
  442. total_bricks,
  443. &type, err_str,
  444. sizeof (err_str));
  445. if (ret == -1) {
  446. gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
  447. goto out;
  448. }
  449. /* if replica count is same as earlier, set it back to 0 */
  450. if (ret == 1)
  451. replica_count = 0;
  452. ret = dict_set_int32 (dict, "replica-count", replica_count);
  453. if (ret) {
  454. gf_log (THIS->name, GF_LOG_ERROR,
  455. "failed to set the replica-count in dict");
  456. goto out;
  457. }
  458. brick_val:
  459. ret = dict_get_str (dict, "bricks", &bricks);
  460. if (ret) {
  461. snprintf (err_str, sizeof (err_str), "Unable to get volume "
  462. "bricks");
  463. gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
  464. goto out;
  465. }
  466. gf_cmd_log ("Volume add-brick", "volname: %s type %d count:%d bricks:%s"
  467. ,volname, volinfo->type, brick_count, brick_list);
  468. if (type != volinfo->type) {
  469. ret = dict_set_int32 (dict, "type", type);
  470. if (ret)
  471. gf_log (THIS->name, GF_LOG_ERROR,
  472. "failed to set the new type in dict");
  473. }
  474. ret = glusterd_op_begin (req, GD_OP_ADD_BRICK, dict);
  475. out:
  476. gf_cmd_log ("Volume add-brick","on volname: %s %s", volname,
  477. (ret != 0)? "FAILED" : "SUCCESS");
  478. if (ret) {
  479. if (dict)
  480. dict_unref (dict);
  481. rsp.op_ret = -1;
  482. rsp.op_errno = 0;
  483. if (err_str[0] == '\0')
  484. snprintf (err_str, sizeof (err_str), "Operation failed");
  485. rsp.op_errstr = err_str;
  486. cli_rsp = &rsp;
  487. glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL,
  488. (xdrproc_t)xdr_gf_cli_rsp);
  489. ret = 0; //sent error to cli, prevent second reply
  490. }
  491. glusterd_friend_sm ();
  492. glusterd_op_sm ();
  493. if (cli_req.dict.dict_val)
  494. free (cli_req.dict.dict_val); //its malloced by xdr
  495. return ret;
  496. }
  497. int
  498. glusterd_handle_remove_brick (rpcsvc_request_t *req)
  499. {
  500. int32_t ret = -1;
  501. gf_cli_req cli_req = {{0,}};
  502. dict_t *dict = NULL;
  503. int32_t count = 0;
  504. char *brick = NULL;
  505. char key[256] = {0,};
  506. char *brick_list = NULL;
  507. int i = 1;
  508. glusterd_volinfo_t *volinfo = NULL;
  509. glusterd_brickinfo_t *brickinfo = NULL;
  510. int32_t pos = 0;
  511. int32_t sub_volume = 0;
  512. int32_t sub_volume_start = 0;
  513. int32_t sub_volume_end = 0;
  514. glusterd_brickinfo_t *tmp = NULL;
  515. char err_str[2048] = {0};
  516. gf_cli_rsp rsp = {0,};
  517. void *cli_rsp = NULL;
  518. char vol_type[256] = {0,};
  519. int32_t replica_count = 0;
  520. int32_t brick_index = 0;
  521. int32_t tmp_brick_idx = 0;
  522. int found = 0;
  523. int diff_count = 0;
  524. char *volname = 0;
  525. GF_ASSERT (req);
  526. if (!xdr_to_generic (req->msg[0], &cli_req,
  527. (xdrproc_t)xdr_gf_cli_req)) {
  528. //failed to decode msg;
  529. req->rpc_err = GARBAGE_ARGS;
  530. goto out;
  531. }
  532. gf_log ("glusterd", GF_LOG_INFO, "Received rem brick req");
  533. if (cli_req.dict.dict_len) {
  534. /* Unserialize the dictionary */
  535. dict = dict_new ();
  536. ret = dict_unserialize (cli_req.dict.dict_val,
  537. cli_req.dict.dict_len,
  538. &dict);
  539. if (ret < 0) {
  540. gf_log ("glusterd", GF_LOG_ERROR,
  541. "failed to "
  542. "unserialize req-buffer to dictionary");
  543. goto out;
  544. }
  545. }
  546. ret = dict_get_str (dict, "volname", &volname);
  547. if (ret) {
  548. gf_log (THIS->name, GF_LOG_ERROR, "Unable to get volname");
  549. goto out;
  550. }
  551. gf_cmd_log ("Volume remove-brick","on volname: %s attempted", volname);
  552. ret = dict_get_int32 (dict, "count", &count);
  553. if (ret) {
  554. gf_log ("", GF_LOG_ERROR, "Unable to get count");
  555. goto out;
  556. }
  557. ret = glusterd_volinfo_find (volname, &volinfo);
  558. if (ret) {
  559. snprintf (err_str, 2048, "Volume %s does not exist",
  560. volname);
  561. gf_log ("", GF_LOG_ERROR, "%s", err_str);
  562. goto out;
  563. }
  564. ret = dict_get_int32 (dict, "replica-count", &replica_count);
  565. if (!ret) {
  566. gf_log (THIS->name, GF_LOG_INFO,
  567. "request to change replica-count to %d", replica_count);
  568. ret = gd_rmbr_validate_replica_count (volinfo, replica_count,
  569. count, err_str,
  570. sizeof (err_str));
  571. if (ret < 0) {
  572. /* logging and error msg are done in above function
  573. itself */
  574. goto out;
  575. }
  576. dict_del (dict, "replica-count");
  577. if (ret) {
  578. replica_count = 0;
  579. } else {
  580. ret = dict_set_int32 (dict, "replica-count",
  581. replica_count);
  582. if (ret) {
  583. gf_log (THIS->name, GF_LOG_WARNING,
  584. "failed to set the replica_count "
  585. "in dict");
  586. goto out;
  587. }
  588. }
  589. }
  590. /* 'vol_type' is used for giving the meaning full error msg for user */
  591. if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
  592. strcpy (vol_type, "replica");
  593. } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE) {
  594. strcpy (vol_type, "stripe");
  595. } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
  596. strcpy (vol_type, "stripe-replicate");
  597. } else {
  598. strcpy (vol_type, "distribute");
  599. }
  600. /* Do not allow remove-brick if the volume is plain stripe */
  601. if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) &&
  602. (volinfo->brick_count == volinfo->stripe_count)) {
  603. snprintf (err_str, 2048,
  604. "Removing brick from a plain stripe is not allowed");
  605. gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
  606. ret = -1;
  607. goto out;
  608. }
  609. if (!replica_count &&
  610. (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) &&
  611. (volinfo->brick_count == volinfo->dist_leaf_count)) {
  612. snprintf (err_str, 2048,
  613. "Removing bricks from stripe-replicate"
  614. " configuration is not allowed without reducing "
  615. "replica or stripe count explicitly.");
  616. gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
  617. ret = -1;
  618. goto out;
  619. }
  620. if (!replica_count &&
  621. (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
  622. (volinfo->brick_count == volinfo->dist_leaf_count)) {
  623. snprintf (err_str, 2048,
  624. "Removing bricks from replicate configuration "
  625. "is not allowed without reducing replica count "
  626. "explicitly.");
  627. gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
  628. ret = -1;
  629. goto out;
  630. }
  631. /* Do not allow remove-brick if the bricks given is less than
  632. the replica count or stripe count */
  633. if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
  634. if (volinfo->dist_leaf_count &&
  635. (count % volinfo->dist_leaf_count)) {
  636. snprintf (err_str, 2048, "Remove brick incorrect"
  637. " brick count of %d for %s %d",
  638. count, vol_type, volinfo->dist_leaf_count);
  639. gf_log ("", GF_LOG_ERROR, "%s", err_str);
  640. ret = -1;
  641. goto out;
  642. }
  643. }
  644. brick_list = GF_MALLOC (120000 * sizeof(*brick_list),gf_common_mt_char);
  645. if (!brick_list) {
  646. ret = -1;
  647. goto out;
  648. }
  649. strcpy (brick_list, " ");
  650. while ( i <= count) {
  651. snprintf (key, 256, "brick%d", i);
  652. ret = dict_get_str (dict, key, &brick);
  653. if (ret) {
  654. gf_log ("", GF_LOG_ERROR, "Unable to get %s", key);
  655. goto out;
  656. }
  657. gf_log ("", GF_LOG_DEBUG, "Remove brick count %d brick: %s",
  658. i, brick);
  659. ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
  660. &brickinfo,
  661. GF_PATH_COMPLETE);
  662. if (ret) {
  663. snprintf(err_str, 2048,"Incorrect brick %s for volume"
  664. " %s", brick, volname);
  665. gf_log ("", GF_LOG_ERROR, "%s", err_str);
  666. goto out;
  667. }
  668. strcat(brick_list, brick);
  669. strcat(brick_list, " ");
  670. i++;
  671. if ((volinfo->type == GF_CLUSTER_TYPE_NONE) ||
  672. (volinfo->brick_count <= volinfo->dist_leaf_count))
  673. continue;
  674. if (replica_count) {
  675. /* do the validation of bricks here */
  676. /* -2 because i++ is already done, and i starts with 1,
  677. instead of 0 */
  678. diff_count = (volinfo->replica_count - replica_count);
  679. brick_index = (((i -2) / diff_count) * volinfo->replica_count);
  680. tmp_brick_idx = 0;
  681. found = 0;
  682. list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
  683. tmp_brick_idx++;
  684. gf_log (THIS->name, GF_LOG_TRACE,
  685. "validate brick %s:%s (%d %d %d)",
  686. tmp->hostname, tmp->path, tmp_brick_idx,
  687. brick_index, volinfo->replica_count);
  688. if (tmp_brick_idx <= brick_index)
  689. continue;
  690. if (tmp_brick_idx >
  691. (brick_index + volinfo->replica_count))
  692. break;
  693. if ((!strcmp (tmp->hostname,brickinfo->hostname)) &&
  694. !strcmp (tmp->path, brickinfo->path)) {
  695. found = 1;
  696. break;
  697. }
  698. }
  699. if (found)
  700. continue;
  701. snprintf(err_str, 2048,"Bricks are from same subvol");
  702. gf_log (THIS->name, GF_LOG_INFO,
  703. "failed to validate brick %s:%s (%d %d %d)",
  704. tmp->hostname, tmp->path, tmp_brick_idx,
  705. brick_index, volinfo->replica_count);
  706. ret = -1;
  707. /* brick order is not valid */
  708. goto out;
  709. }
  710. pos = 0;
  711. list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
  712. if (strcmp (tmp->hostname,brickinfo->hostname) ||
  713. strcmp (tmp->path, brickinfo->path)) {
  714. pos++;
  715. continue;
  716. }
  717. gf_log ("", GF_LOG_INFO, "Found brick");
  718. if (!sub_volume && (volinfo->dist_leaf_count > 1)) {
  719. sub_volume = (pos / volinfo->dist_leaf_count) + 1;
  720. sub_volume_start = (volinfo->dist_leaf_count *
  721. (sub_volume - 1));
  722. sub_volume_end = (volinfo->dist_leaf_count *
  723. sub_volume) - 1;
  724. } else {
  725. if (pos < sub_volume_start ||
  726. pos >sub_volume_end) {
  727. ret = -1;
  728. snprintf(err_str, 2048,"Bricks not from"
  729. " same subvol for %s",
  730. vol_type);
  731. gf_log ("", GF_LOG_ERROR,
  732. "%s", err_str);
  733. goto out;
  734. }
  735. }
  736. break;
  737. }
  738. }
  739. gf_cmd_log ("Volume remove-brick","volname: %s count:%d bricks:%s",
  740. volname, count, brick_list);
  741. ret = glusterd_op_begin (req, GD_OP_REMOVE_BRICK, dict);
  742. gf_cmd_log ("Volume remove-brick","on volname: %s %s", volname,
  743. (ret) ? "FAILED" : "SUCCESS");
  744. out:
  745. if (ret) {
  746. if (dict)
  747. dict_unref (dict);
  748. rsp.op_ret = -1;
  749. rsp.op_errno = 0;
  750. if (err_str[0] == '\0')
  751. snprintf (err_str, sizeof (err_str), "Operation failed");
  752. gf_log ("", GF_LOG_ERROR, "%s", err_str);
  753. rsp.op_errstr = err_str;
  754. cli_rsp = &rsp;
  755. glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL,
  756. (xdrproc_t)xdr_gf_cli_rsp);
  757. ret = 0; //sent error to cli, prevent second reply
  758. }
  759. if (brick_list)
  760. GF_FREE (brick_list);
  761. if (cli_req.dict.dict_val)
  762. free (cli_req.dict.dict_val); //its malloced by xdr
  763. glusterd_friend_sm ();
  764. glusterd_op_sm ();
  765. return ret;
  766. }
  767. /* op-sm */
  768. int
  769. glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
  770. char *bricks, dict_t *dict)
  771. {
  772. glusterd_brickinfo_t *brickinfo = NULL;
  773. char *brick = NULL;
  774. int32_t i = 1;
  775. char *brick_list = NULL;
  776. char *free_ptr1 = NULL;
  777. char *free_ptr2 = NULL;
  778. char *saveptr = NULL;
  779. int32_t ret = -1;
  780. int32_t stripe_count = 0;
  781. int32_t replica_count = 0;
  782. int32_t type = 0;
  783. GF_ASSERT (volinfo);
  784. if (bricks) {
  785. brick_list = gf_strdup (bricks);
  786. free_ptr1 = brick_list;
  787. }
  788. if (count)
  789. brick = strtok_r (brick_list+1, " \n", &saveptr);
  790. if (dict) {
  791. ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
  792. if (!ret)
  793. gf_log (THIS->name, GF_LOG_INFO,
  794. "stripe-count is set %d", stripe_count);
  795. ret = dict_get_int32 (dict, "replica-count", &replica_count);
  796. if (!ret)
  797. gf_log (THIS->name, GF_LOG_INFO,
  798. "replica-count is set %d", replica_count);
  799. ret = dict_get_int32 (dict, "type", &type);
  800. if (!ret)
  801. gf_log (THIS->name, GF_LOG_INFO,
  802. "type is set %d, need to change it", type);
  803. }
  804. while ( i <= count) {
  805. ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
  806. if (ret)
  807. goto out;
  808. ret = glusterd_resolve_brick (brickinfo);
  809. if (ret)
  810. goto out;
  811. if (stripe_count || replica_count) {
  812. add_brick_at_right_order (brickinfo, volinfo, (i - 1),
  813. stripe_count, replica_count);
  814. } else {
  815. list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
  816. }
  817. brick = strtok_r (NULL, " \n", &saveptr);
  818. i++;
  819. volinfo->brick_count++;
  820. }
  821. /* Gets changed only if the options are given in add-brick cli */
  822. if (type)
  823. volinfo->type = type;
  824. if (replica_count) {
  825. volinfo->replica_count = replica_count;
  826. }
  827. if (stripe_count) {
  828. volinfo->stripe_count = stripe_count;
  829. }
  830. volinfo->dist_leaf_count = (volinfo->stripe_count *
  831. volinfo->replica_count);
  832. /* backward compatibility */
  833. volinfo->sub_count = ((volinfo->dist_leaf_count == 1) ? 0:
  834. volinfo->dist_leaf_count);
  835. brick_list = gf_strdup (bricks);
  836. free_ptr2 = brick_list;
  837. i = 1;
  838. if (count)
  839. brick = strtok_r (brick_list+1, " \n", &saveptr);
  840. ret = glusterd_create_volfiles_and_notify_services (volinfo);
  841. if (ret)
  842. goto out;
  843. while (i <= count) {
  844. ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
  845. &brickinfo,
  846. GF_PATH_PARTIAL);
  847. if (ret)
  848. goto out;
  849. if (GLUSTERD_STATUS_STARTED == volinfo->status) {
  850. ret = glusterd_brick_start (volinfo, brickinfo);
  851. if (ret)
  852. goto out;
  853. }
  854. i++;
  855. brick = strtok_r (NULL, " \n", &saveptr);
  856. }
  857. out:
  858. if (free_ptr1)
  859. GF_FREE (free_ptr1);
  860. if (free_ptr2)
  861. GF_FREE (free_ptr2);
  862. gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
  863. return ret;
  864. }
  865. int
  866. glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
  867. int force, int *need_migrate)
  868. {
  869. glusterd_brickinfo_t *brickinfo = NULL;
  870. char *dup_brick = NULL;
  871. int32_t ret = -1;
  872. glusterd_conf_t *priv = NULL;
  873. GF_ASSERT (volinfo);
  874. GF_ASSERT (brick);
  875. priv = THIS->private;
  876. GF_ASSERT (priv);
  877. dup_brick = gf_strdup (brick);
  878. if (!dup_brick)
  879. goto out;
  880. ret = glusterd_volume_brickinfo_get_by_brick (dup_brick, volinfo,
  881. &brickinfo, GF_PATH_COMPLETE);
  882. if (ret)
  883. goto out;
  884. ret = glusterd_resolve_brick (brickinfo);
  885. if (ret)
  886. goto out;
  887. if (!uuid_compare (brickinfo->uuid, priv->uuid)) {
  888. /* Only if the brick is in this glusterd, do the rebalance */
  889. if (need_migrate)
  890. *need_migrate = 1;
  891. }
  892. if (force) {
  893. if (GLUSTERD_STATUS_STARTED == volinfo->status) {
  894. ret = glusterd_brick_stop (volinfo, brickinfo);
  895. if (ret) {
  896. gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop "
  897. "glusterfs, ret: %d", ret);
  898. goto out;
  899. }
  900. }
  901. glusterd_delete_brick (volinfo, brickinfo);
  902. goto out;
  903. }
  904. brickinfo->decommissioned = 1;
  905. out:
  906. if (dup_brick)
  907. GF_FREE (dup_brick);
  908. gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
  909. return ret;
  910. }
  911. int
  912. glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
  913. {
  914. int ret = 0;
  915. char *volname = NULL;
  916. int count = 0;
  917. int i = 0;
  918. char *bricks = NULL;
  919. char *brick_list = NULL;
  920. char *saveptr = NULL;
  921. char *free_ptr = NULL;
  922. char *brick = NULL;
  923. glusterd_brickinfo_t *brickinfo = NULL;
  924. glusterd_volinfo_t *volinfo = NULL;
  925. glusterd_conf_t *priv = NULL;
  926. char msg[2048] = {0,};
  927. gf_boolean_t brick_alloc = _gf_false;
  928. char *all_bricks = NULL;
  929. char *str_ret = NULL;
  930. priv = THIS->private;
  931. if (!priv)
  932. goto out;
  933. ret = dict_get_str (dict, "volname", &volname);
  934. if (ret) {
  935. gf_log (THIS->name, GF_LOG_ERROR,
  936. "Unable to get volume name");
  937. goto out;
  938. }
  939. ret = glusterd_volinfo_find (volname, &volinfo);
  940. if (ret) {
  941. gf_log (THIS->name, GF_LOG_ERROR,
  942. "Unable to find volume: %s", volname);
  943. goto out;
  944. }
  945. ret = glusterd_validate_volume_id (dict, volinfo);
  946. if (ret)
  947. goto out;
  948. if (glusterd_is_rb_ongoing (volinfo)) {
  949. snprintf (msg, sizeof (msg), "Replace brick is in progress on "
  950. "volume %s. Please retry after replace-brick "
  951. "operation is committed or aborted", volname);
  952. gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
  953. *op_errstr = gf_strdup (msg);
  954. ret = -1;
  955. goto out;
  956. }
  957. if (glusterd_is_defrag_on(volinfo)) {
  958. snprintf (msg, sizeof(msg), "Volume name %s rebalance is in "
  959. "progress. Please retry after completion", volname);
  960. gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
  961. *op_errstr = gf_strdup (msg);
  962. ret = -1;
  963. goto out;
  964. }
  965. ret = dict_get_int32 (dict, "count", &count);
  966. if (ret) {
  967. gf_log ("", GF_LOG_ERROR, "Unable to get count");
  968. goto out;
  969. }
  970. ret = dict_get_str (dict, "bricks", &bricks);
  971. if (ret) {
  972. gf_log (THIS->name, GF_LOG_ERROR, "Unable to get bricks");
  973. goto out;
  974. }
  975. if (bricks) {
  976. brick_list = gf_strdup (bricks);
  977. all_bricks = gf_strdup (bricks);
  978. free_ptr = brick_list;
  979. }
  980. if (count)
  981. brick = strtok_r (brick_list+1, " \n", &saveptr);
  982. while ( i < count) {
  983. if (!glusterd_store_is_valid_brickpath (volname, brick) ||
  984. !glusterd_is_valid_volfpath (volname, brick)) {
  985. snprintf (msg, sizeof (msg), "brick path %s is "
  986. "too long", brick);
  987. gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
  988. *op_errstr = gf_strdup (msg);
  989. ret = -1;
  990. goto out;
  991. }
  992. ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
  993. &brickinfo,
  994. GF_PATH_PARTIAL);
  995. if (!ret) {
  996. gf_log (THIS->name, GF_LOG_ERROR,
  997. "Adding duplicate brick: %s", brick);
  998. ret = -1;
  999. goto out;
  1000. } else {
  1001. ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
  1002. if (ret) {
  1003. gf_log (THIS->name, GF_LOG_ERROR,
  1004. "Add-brick: Unable"
  1005. " to get brickinfo");
  1006. goto out;
  1007. }
  1008. brick_alloc = _gf_true;
  1009. }
  1010. ret = glusterd_new_brick_validate (brick, brickinfo, msg,
  1011. sizeof (msg));
  1012. if (ret) {
  1013. *op_errstr = gf_strdup (msg);
  1014. ret = -1;
  1015. goto out;
  1016. }
  1017. if (!uuid_compare (brickinfo->uuid, priv->uuid)) {
  1018. ret = glusterd_brick_create_path (brickinfo->hostname,
  1019. brickinfo->path,
  1020. volinfo->volume_id,
  1021. op_errstr);
  1022. if (ret)
  1023. goto out;
  1024. }
  1025. glusterd_brickinfo_delete (brickinfo);
  1026. brick_alloc = _gf_false;
  1027. brickinfo = NULL;
  1028. brick = strtok_r (NULL, " \n", &saveptr);
  1029. i++;
  1030. }
  1031. out:
  1032. if (free_ptr)
  1033. GF_FREE (free_ptr);
  1034. if (brick_alloc && brickinfo)
  1035. glusterd_brickinfo_delete (brickinfo);
  1036. if (str_ret)
  1037. GF_FREE (str_ret);
  1038. if (all_bricks)
  1039. GF_FREE (all_bricks);
  1040. gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
  1041. return ret;
  1042. }
  1043. int
  1044. glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
  1045. {
  1046. int ret = -1;
  1047. char *volname = NULL;
  1048. glusterd_volinfo_t *volinfo = NULL;
  1049. char *errstr = NULL;
  1050. int32_t brick_count = 0;
  1051. char msg[2048] = {0,};
  1052. int32_t flag = 0;
  1053. gf1_op_commands cmd = GF_OP_CMD_NONE;
  1054. ret = dict_get_str (dict, "volname", &volname);
  1055. if (ret) {
  1056. gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
  1057. goto out;
  1058. }
  1059. ret = glusterd_volinfo_find (volname, &volinfo);
  1060. if (ret) {
  1061. gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname);
  1062. goto out;
  1063. }
  1064. ret = glusterd_validate_volume_id (dict, volinfo);
  1065. if (ret)
  1066. goto out;
  1067. if (glusterd_is_rb_ongoing (volinfo)) {
  1068. snprintf (msg, sizeof (msg), "Replace brick is in progress on "
  1069. "volume %s. Please retry after replace-brick "
  1070. "operation is committed or aborted", volname);
  1071. gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
  1072. *op_errstr = gf_strdup (msg);
  1073. ret = -1;
  1074. goto out;
  1075. }
  1076. ret = dict_get_int32 (dict, "command"