PageRenderTime 67ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 1ms

/source3/lib/dbwrap/dbwrap_ctdb.c

https://bitbucket.org/mikedep333/rdssamba4
C | 1493 lines | 1071 code | 246 blank | 176 comment | 191 complexity | 099523fd3d79e87f40ef7325e959be12 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, GPL-3.0, LGPL-2.1, LGPL-3.0
  1. /*
  2. Unix SMB/CIFS implementation.
  3. Database interface wrapper around ctdbd
  4. Copyright (C) Volker Lendecke 2007-2009
  5. Copyright (C) Michael Adam 2009
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 3 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "includes.h"
  18. #include "system/filesys.h"
  19. #include "lib/util/tdb_wrap.h"
  20. #include "util_tdb.h"
  21. #ifdef CLUSTER_SUPPORT
  22. /*
  23. * It is not possible to include ctdb.h and tdb_compat.h (included via
  24. * some other include above) without warnings. This fixes those
  25. * warnings.
  26. */
  27. #ifdef typesafe_cb
  28. #undef typesafe_cb
  29. #endif
  30. #ifdef typesafe_cb_preargs
  31. #undef typesafe_cb_preargs
  32. #endif
  33. #ifdef typesafe_cb_postargs
  34. #undef typesafe_cb_postargs
  35. #endif
  36. #include "ctdb.h"
  37. #include "ctdb_private.h"
  38. #include "ctdbd_conn.h"
  39. #include "dbwrap/dbwrap.h"
  40. #include "dbwrap/dbwrap_private.h"
  41. #include "dbwrap/dbwrap_ctdb.h"
  42. #include "g_lock.h"
  43. #include "messages.h"
  44. struct db_ctdb_transaction_handle {
  45. struct db_ctdb_ctx *ctx;
  46. /*
  47. * we store the reads and writes done under a transaction:
  48. * - one list stores both reads and writes (m_all),
  49. * - the other just writes (m_write)
  50. */
  51. struct ctdb_marshall_buffer *m_all;
  52. struct ctdb_marshall_buffer *m_write;
  53. uint32_t nesting;
  54. bool nested_cancel;
  55. char *lock_name;
  56. };
  57. struct db_ctdb_ctx {
  58. struct db_context *db;
  59. struct tdb_wrap *wtdb;
  60. uint32 db_id;
  61. struct db_ctdb_transaction_handle *transaction;
  62. struct g_lock_ctx *lock_ctx;
  63. };
  64. struct db_ctdb_rec {
  65. struct db_ctdb_ctx *ctdb_ctx;
  66. struct ctdb_ltdb_header header;
  67. struct timeval lock_time;
  68. };
  69. static NTSTATUS tdb_error_to_ntstatus(struct tdb_context *tdb)
  70. {
  71. NTSTATUS status;
  72. enum TDB_ERROR tret = tdb_error(tdb);
  73. switch (tret) {
  74. case TDB_ERR_EXISTS:
  75. status = NT_STATUS_OBJECT_NAME_COLLISION;
  76. break;
  77. case TDB_ERR_NOEXIST:
  78. status = NT_STATUS_OBJECT_NAME_NOT_FOUND;
  79. break;
  80. default:
  81. status = NT_STATUS_INTERNAL_DB_CORRUPTION;
  82. break;
  83. }
  84. return status;
  85. }
  86. /**
  87. * fetch a record from the tdb, separating out the header
  88. * information and returning the body of the record.
  89. */
  90. static NTSTATUS db_ctdb_ltdb_fetch(struct db_ctdb_ctx *db,
  91. TDB_DATA key,
  92. struct ctdb_ltdb_header *header,
  93. TALLOC_CTX *mem_ctx,
  94. TDB_DATA *data)
  95. {
  96. TDB_DATA rec;
  97. NTSTATUS status;
  98. rec = tdb_fetch_compat(db->wtdb->tdb, key);
  99. if (rec.dsize < sizeof(struct ctdb_ltdb_header)) {
  100. status = NT_STATUS_NOT_FOUND;
  101. if (data) {
  102. ZERO_STRUCTP(data);
  103. }
  104. if (header) {
  105. header->dmaster = (uint32_t)-1;
  106. header->rsn = 0;
  107. }
  108. goto done;
  109. }
  110. if (header) {
  111. *header = *(struct ctdb_ltdb_header *)rec.dptr;
  112. }
  113. if (data) {
  114. data->dsize = rec.dsize - sizeof(struct ctdb_ltdb_header);
  115. if (data->dsize == 0) {
  116. data->dptr = NULL;
  117. } else {
  118. data->dptr = (unsigned char *)talloc_memdup(mem_ctx,
  119. rec.dptr
  120. + sizeof(struct ctdb_ltdb_header),
  121. data->dsize);
  122. if (data->dptr == NULL) {
  123. status = NT_STATUS_NO_MEMORY;
  124. goto done;
  125. }
  126. }
  127. }
  128. status = NT_STATUS_OK;
  129. done:
  130. SAFE_FREE(rec.dptr);
  131. return status;
  132. }
  133. /*
  134. * Store a record together with the ctdb record header
  135. * in the local copy of the database.
  136. */
  137. static NTSTATUS db_ctdb_ltdb_store(struct db_ctdb_ctx *db,
  138. TDB_DATA key,
  139. struct ctdb_ltdb_header *header,
  140. TDB_DATA data)
  141. {
  142. TALLOC_CTX *tmp_ctx = talloc_stackframe();
  143. TDB_DATA rec;
  144. int ret;
  145. rec.dsize = data.dsize + sizeof(struct ctdb_ltdb_header);
  146. rec.dptr = (uint8_t *)talloc_size(tmp_ctx, rec.dsize);
  147. if (rec.dptr == NULL) {
  148. talloc_free(tmp_ctx);
  149. return NT_STATUS_NO_MEMORY;
  150. }
  151. memcpy(rec.dptr, header, sizeof(struct ctdb_ltdb_header));
  152. memcpy(sizeof(struct ctdb_ltdb_header) + (uint8_t *)rec.dptr, data.dptr, data.dsize);
  153. ret = tdb_store(db->wtdb->tdb, key, rec, TDB_REPLACE);
  154. talloc_free(tmp_ctx);
  155. return (ret == 0) ? NT_STATUS_OK
  156. : tdb_error_to_ntstatus(db->wtdb->tdb);
  157. }
  158. /*
  159. form a ctdb_rec_data record from a key/data pair
  160. note that header may be NULL. If not NULL then it is included in the data portion
  161. of the record
  162. */
  163. static struct ctdb_rec_data *db_ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid,
  164. TDB_DATA key,
  165. struct ctdb_ltdb_header *header,
  166. TDB_DATA data)
  167. {
  168. size_t length;
  169. struct ctdb_rec_data *d;
  170. length = offsetof(struct ctdb_rec_data, data) + key.dsize +
  171. data.dsize + (header?sizeof(*header):0);
  172. d = (struct ctdb_rec_data *)talloc_size(mem_ctx, length);
  173. if (d == NULL) {
  174. return NULL;
  175. }
  176. d->length = length;
  177. d->reqid = reqid;
  178. d->keylen = key.dsize;
  179. memcpy(&d->data[0], key.dptr, key.dsize);
  180. if (header) {
  181. d->datalen = data.dsize + sizeof(*header);
  182. memcpy(&d->data[key.dsize], header, sizeof(*header));
  183. memcpy(&d->data[key.dsize+sizeof(*header)], data.dptr, data.dsize);
  184. } else {
  185. d->datalen = data.dsize;
  186. memcpy(&d->data[key.dsize], data.dptr, data.dsize);
  187. }
  188. return d;
  189. }
  190. /* helper function for marshalling multiple records */
  191. static struct ctdb_marshall_buffer *db_ctdb_marshall_add(TALLOC_CTX *mem_ctx,
  192. struct ctdb_marshall_buffer *m,
  193. uint64_t db_id,
  194. uint32_t reqid,
  195. TDB_DATA key,
  196. struct ctdb_ltdb_header *header,
  197. TDB_DATA data)
  198. {
  199. struct ctdb_rec_data *r;
  200. size_t m_size, r_size;
  201. struct ctdb_marshall_buffer *m2 = NULL;
  202. r = db_ctdb_marshall_record(talloc_tos(), reqid, key, header, data);
  203. if (r == NULL) {
  204. talloc_free(m);
  205. return NULL;
  206. }
  207. if (m == NULL) {
  208. m = (struct ctdb_marshall_buffer *)talloc_zero_size(
  209. mem_ctx, offsetof(struct ctdb_marshall_buffer, data));
  210. if (m == NULL) {
  211. goto done;
  212. }
  213. m->db_id = db_id;
  214. }
  215. m_size = talloc_get_size(m);
  216. r_size = talloc_get_size(r);
  217. m2 = (struct ctdb_marshall_buffer *)talloc_realloc_size(
  218. mem_ctx, m, m_size + r_size);
  219. if (m2 == NULL) {
  220. talloc_free(m);
  221. goto done;
  222. }
  223. memcpy(m_size + (uint8_t *)m2, r, r_size);
  224. m2->count++;
  225. done:
  226. talloc_free(r);
  227. return m2;
  228. }
  229. /* we've finished marshalling, return a data blob with the marshalled records */
  230. static TDB_DATA db_ctdb_marshall_finish(struct ctdb_marshall_buffer *m)
  231. {
  232. TDB_DATA data;
  233. data.dptr = (uint8_t *)m;
  234. data.dsize = talloc_get_size(m);
  235. return data;
  236. }
  237. /*
  238. loop over a marshalling buffer
  239. - pass r==NULL to start
  240. - loop the number of times indicated by m->count
  241. */
  242. static struct ctdb_rec_data *db_ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r,
  243. uint32_t *reqid,
  244. struct ctdb_ltdb_header *header,
  245. TDB_DATA *key, TDB_DATA *data)
  246. {
  247. if (r == NULL) {
  248. r = (struct ctdb_rec_data *)&m->data[0];
  249. } else {
  250. r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
  251. }
  252. if (reqid != NULL) {
  253. *reqid = r->reqid;
  254. }
  255. if (key != NULL) {
  256. key->dptr = &r->data[0];
  257. key->dsize = r->keylen;
  258. }
  259. if (data != NULL) {
  260. data->dptr = &r->data[r->keylen];
  261. data->dsize = r->datalen;
  262. if (header != NULL) {
  263. data->dptr += sizeof(*header);
  264. data->dsize -= sizeof(*header);
  265. }
  266. }
  267. if (header != NULL) {
  268. if (r->datalen < sizeof(*header)) {
  269. return NULL;
  270. }
  271. *header = *(struct ctdb_ltdb_header *)&r->data[r->keylen];
  272. }
  273. return r;
  274. }
  275. /**
  276. * CTDB transaction destructor
  277. */
  278. static int db_ctdb_transaction_destructor(struct db_ctdb_transaction_handle *h)
  279. {
  280. NTSTATUS status;
  281. status = g_lock_unlock(h->ctx->lock_ctx, h->lock_name);
  282. if (!NT_STATUS_IS_OK(status)) {
  283. DEBUG(0, ("g_lock_unlock failed for %s: %s\n", h->lock_name,
  284. nt_errstr(status)));
  285. return -1;
  286. }
  287. return 0;
  288. }
  289. /**
  290. * CTDB dbwrap API: transaction_start function
  291. * starts a transaction on a persistent database
  292. */
  293. static int db_ctdb_transaction_start(struct db_context *db)
  294. {
  295. struct db_ctdb_transaction_handle *h;
  296. NTSTATUS status;
  297. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  298. struct db_ctdb_ctx);
  299. if (!db->persistent) {
  300. DEBUG(0,("transactions not supported on non-persistent database 0x%08x\n",
  301. ctx->db_id));
  302. return -1;
  303. }
  304. if (ctx->transaction) {
  305. ctx->transaction->nesting++;
  306. DEBUG(5, (__location__ " transaction start on db 0x%08x: nesting %d -> %d\n",
  307. ctx->db_id, ctx->transaction->nesting - 1, ctx->transaction->nesting));
  308. return 0;
  309. }
  310. h = talloc_zero(db, struct db_ctdb_transaction_handle);
  311. if (h == NULL) {
  312. DEBUG(0,(__location__ " oom for transaction handle\n"));
  313. return -1;
  314. }
  315. h->ctx = ctx;
  316. h->lock_name = talloc_asprintf(h, "transaction_db_0x%08x",
  317. (unsigned int)ctx->db_id);
  318. if (h->lock_name == NULL) {
  319. DEBUG(0, ("talloc_asprintf failed\n"));
  320. TALLOC_FREE(h);
  321. return -1;
  322. }
  323. /*
  324. * Wait a day, i.e. forever...
  325. */
  326. status = g_lock_lock(ctx->lock_ctx, h->lock_name, G_LOCK_WRITE,
  327. timeval_set(86400, 0));
  328. if (!NT_STATUS_IS_OK(status)) {
  329. DEBUG(0, ("g_lock_lock failed: %s\n", nt_errstr(status)));
  330. TALLOC_FREE(h);
  331. return -1;
  332. }
  333. talloc_set_destructor(h, db_ctdb_transaction_destructor);
  334. ctx->transaction = h;
  335. DEBUG(5,(__location__ " transaction started on db 0x%08x\n", ctx->db_id));
  336. return 0;
  337. }
  338. static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
  339. TDB_DATA key,
  340. struct ctdb_ltdb_header *pheader,
  341. TALLOC_CTX *mem_ctx,
  342. TDB_DATA *pdata)
  343. {
  344. struct ctdb_rec_data *rec = NULL;
  345. struct ctdb_ltdb_header h;
  346. bool found = false;
  347. TDB_DATA data;
  348. int i;
  349. if (buf == NULL) {
  350. return false;
  351. }
  352. ZERO_STRUCT(h);
  353. ZERO_STRUCT(data);
  354. /*
  355. * Walk the list of records written during this
  356. * transaction. If we want to read one we have already
  357. * written, return the last written sample. Thus we do not do
  358. * a "break;" for the first hit, this record might have been
  359. * overwritten later.
  360. */
  361. for (i=0; i<buf->count; i++) {
  362. TDB_DATA tkey, tdata;
  363. uint32_t reqid;
  364. struct ctdb_ltdb_header hdr;
  365. ZERO_STRUCT(hdr);
  366. rec = db_ctdb_marshall_loop_next(buf, rec, &reqid, &hdr, &tkey,
  367. &tdata);
  368. if (rec == NULL) {
  369. return false;
  370. }
  371. if (tdb_data_equal(key, tkey)) {
  372. found = true;
  373. data = tdata;
  374. h = hdr;
  375. }
  376. }
  377. if (!found) {
  378. return false;
  379. }
  380. if (pdata != NULL) {
  381. data.dptr = (uint8_t *)talloc_memdup(mem_ctx, data.dptr,
  382. data.dsize);
  383. if ((data.dsize != 0) && (data.dptr == NULL)) {
  384. return false;
  385. }
  386. *pdata = data;
  387. }
  388. if (pheader != NULL) {
  389. *pheader = h;
  390. }
  391. return true;
  392. }
  393. /*
  394. fetch a record inside a transaction
  395. */
  396. static int db_ctdb_transaction_fetch(struct db_ctdb_ctx *db,
  397. TALLOC_CTX *mem_ctx,
  398. TDB_DATA key, TDB_DATA *data)
  399. {
  400. struct db_ctdb_transaction_handle *h = db->transaction;
  401. NTSTATUS status;
  402. bool found;
  403. found = pull_newest_from_marshall_buffer(h->m_write, key, NULL,
  404. mem_ctx, data);
  405. if (found) {
  406. return 0;
  407. }
  408. status = db_ctdb_ltdb_fetch(h->ctx, key, NULL, mem_ctx, data);
  409. if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
  410. *data = tdb_null;
  411. } else if (!NT_STATUS_IS_OK(status)) {
  412. return -1;
  413. }
  414. h->m_all = db_ctdb_marshall_add(h, h->m_all, h->ctx->db_id, 1, key,
  415. NULL, *data);
  416. if (h->m_all == NULL) {
  417. DEBUG(0,(__location__ " Failed to add to marshalling "
  418. "record\n"));
  419. data->dsize = 0;
  420. talloc_free(data->dptr);
  421. return -1;
  422. }
  423. return 0;
  424. }
  425. /**
  426. * Fetch a record from a persistent database
  427. * without record locking and without an active transaction.
  428. *
  429. * This just fetches from the local database copy.
  430. * Since the databases are kept in syc cluster-wide,
  431. * there is no point in doing a ctdb call to fetch the
  432. * record from the lmaster. It does even harm since migration
  433. * of records bump their RSN and hence render the persistent
  434. * database inconsistent.
  435. */
  436. static int db_ctdb_fetch_persistent(struct db_ctdb_ctx *db,
  437. TALLOC_CTX *mem_ctx,
  438. TDB_DATA key, TDB_DATA *data)
  439. {
  440. NTSTATUS status;
  441. status = db_ctdb_ltdb_fetch(db, key, NULL, mem_ctx, data);
  442. if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
  443. *data = tdb_null;
  444. } else if (!NT_STATUS_IS_OK(status)) {
  445. return -1;
  446. }
  447. return 0;
  448. }
  449. static NTSTATUS db_ctdb_store_transaction(struct db_record *rec, TDB_DATA data, int flag);
  450. static NTSTATUS db_ctdb_delete_transaction(struct db_record *rec);
  451. static struct db_record *db_ctdb_fetch_locked_transaction(struct db_ctdb_ctx *ctx,
  452. TALLOC_CTX *mem_ctx,
  453. TDB_DATA key)
  454. {
  455. struct db_record *result;
  456. TDB_DATA ctdb_data;
  457. if (!(result = talloc(mem_ctx, struct db_record))) {
  458. DEBUG(0, ("talloc failed\n"));
  459. return NULL;
  460. }
  461. result->private_data = ctx->transaction;
  462. result->key.dsize = key.dsize;
  463. result->key.dptr = (uint8 *)talloc_memdup(result, key.dptr, key.dsize);
  464. if (result->key.dptr == NULL) {
  465. DEBUG(0, ("talloc failed\n"));
  466. TALLOC_FREE(result);
  467. return NULL;
  468. }
  469. result->store = db_ctdb_store_transaction;
  470. result->delete_rec = db_ctdb_delete_transaction;
  471. if (pull_newest_from_marshall_buffer(ctx->transaction->m_write, key,
  472. NULL, result, &result->value)) {
  473. return result;
  474. }
  475. ctdb_data = tdb_fetch_compat(ctx->wtdb->tdb, key);
  476. if (ctdb_data.dptr == NULL) {
  477. /* create the record */
  478. result->value = tdb_null;
  479. return result;
  480. }
  481. result->value.dsize = ctdb_data.dsize - sizeof(struct ctdb_ltdb_header);
  482. result->value.dptr = NULL;
  483. if ((result->value.dsize != 0)
  484. && !(result->value.dptr = (uint8 *)talloc_memdup(
  485. result, ctdb_data.dptr + sizeof(struct ctdb_ltdb_header),
  486. result->value.dsize))) {
  487. DEBUG(0, ("talloc failed\n"));
  488. TALLOC_FREE(result);
  489. }
  490. SAFE_FREE(ctdb_data.dptr);
  491. return result;
  492. }
  493. static int db_ctdb_record_destructor(struct db_record **recp)
  494. {
  495. struct db_record *rec = talloc_get_type_abort(*recp, struct db_record);
  496. struct db_ctdb_transaction_handle *h = talloc_get_type_abort(
  497. rec->private_data, struct db_ctdb_transaction_handle);
  498. int ret = h->ctx->db->transaction_commit(h->ctx->db);
  499. if (ret != 0) {
  500. DEBUG(0,(__location__ " transaction_commit failed\n"));
  501. }
  502. return 0;
  503. }
  504. /*
  505. auto-create a transaction for persistent databases
  506. */
  507. static struct db_record *db_ctdb_fetch_locked_persistent(struct db_ctdb_ctx *ctx,
  508. TALLOC_CTX *mem_ctx,
  509. TDB_DATA key)
  510. {
  511. int res;
  512. struct db_record *rec, **recp;
  513. res = db_ctdb_transaction_start(ctx->db);
  514. if (res == -1) {
  515. return NULL;
  516. }
  517. rec = db_ctdb_fetch_locked_transaction(ctx, mem_ctx, key);
  518. if (rec == NULL) {
  519. ctx->db->transaction_cancel(ctx->db);
  520. return NULL;
  521. }
  522. /* destroy this transaction when we release the lock */
  523. recp = talloc(rec, struct db_record *);
  524. if (recp == NULL) {
  525. ctx->db->transaction_cancel(ctx->db);
  526. talloc_free(rec);
  527. return NULL;
  528. }
  529. *recp = rec;
  530. talloc_set_destructor(recp, db_ctdb_record_destructor);
  531. return rec;
  532. }
  533. /*
  534. stores a record inside a transaction
  535. */
  536. static NTSTATUS db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h,
  537. TDB_DATA key, TDB_DATA data)
  538. {
  539. TALLOC_CTX *tmp_ctx = talloc_new(h);
  540. TDB_DATA rec;
  541. struct ctdb_ltdb_header header;
  542. ZERO_STRUCT(header);
  543. /* we need the header so we can update the RSN */
  544. if (!pull_newest_from_marshall_buffer(h->m_write, key, &header,
  545. NULL, NULL)) {
  546. rec = tdb_fetch_compat(h->ctx->wtdb->tdb, key);
  547. if (rec.dptr != NULL) {
  548. memcpy(&header, rec.dptr,
  549. sizeof(struct ctdb_ltdb_header));
  550. rec.dsize -= sizeof(struct ctdb_ltdb_header);
  551. /*
  552. * a special case, we are writing the same
  553. * data that is there now
  554. */
  555. if (data.dsize == rec.dsize &&
  556. memcmp(data.dptr,
  557. rec.dptr + sizeof(struct ctdb_ltdb_header),
  558. data.dsize) == 0) {
  559. SAFE_FREE(rec.dptr);
  560. talloc_free(tmp_ctx);
  561. return NT_STATUS_OK;
  562. }
  563. }
  564. SAFE_FREE(rec.dptr);
  565. }
  566. header.dmaster = get_my_vnn();
  567. header.rsn++;
  568. h->m_all = db_ctdb_marshall_add(h, h->m_all, h->ctx->db_id, 0, key,
  569. NULL, data);
  570. if (h->m_all == NULL) {
  571. DEBUG(0,(__location__ " Failed to add to marshalling "
  572. "record\n"));
  573. talloc_free(tmp_ctx);
  574. return NT_STATUS_NO_MEMORY;
  575. }
  576. h->m_write = db_ctdb_marshall_add(h, h->m_write, h->ctx->db_id, 0, key, &header, data);
  577. if (h->m_write == NULL) {
  578. DEBUG(0,(__location__ " Failed to add to marshalling record\n"));
  579. talloc_free(tmp_ctx);
  580. return NT_STATUS_NO_MEMORY;
  581. }
  582. talloc_free(tmp_ctx);
  583. return NT_STATUS_OK;
  584. }
  585. /*
  586. a record store inside a transaction
  587. */
  588. static NTSTATUS db_ctdb_store_transaction(struct db_record *rec, TDB_DATA data, int flag)
  589. {
  590. struct db_ctdb_transaction_handle *h = talloc_get_type_abort(
  591. rec->private_data, struct db_ctdb_transaction_handle);
  592. NTSTATUS status;
  593. status = db_ctdb_transaction_store(h, rec->key, data);
  594. return status;
  595. }
  596. /*
  597. a record delete inside a transaction
  598. */
  599. static NTSTATUS db_ctdb_delete_transaction(struct db_record *rec)
  600. {
  601. struct db_ctdb_transaction_handle *h = talloc_get_type_abort(
  602. rec->private_data, struct db_ctdb_transaction_handle);
  603. NTSTATUS status;
  604. status = db_ctdb_transaction_store(h, rec->key, tdb_null);
  605. return status;
  606. }
  607. /**
  608. * Fetch the db sequence number of a persistent db directly from the db.
  609. */
  610. static NTSTATUS db_ctdb_fetch_db_seqnum_from_db(struct db_ctdb_ctx *db,
  611. uint64_t *seqnum)
  612. {
  613. NTSTATUS status;
  614. const char *keyname = CTDB_DB_SEQNUM_KEY;
  615. TDB_DATA key;
  616. TDB_DATA data;
  617. struct ctdb_ltdb_header header;
  618. TALLOC_CTX *mem_ctx = talloc_stackframe();
  619. if (seqnum == NULL) {
  620. return NT_STATUS_INVALID_PARAMETER;
  621. }
  622. key = string_term_tdb_data(keyname);
  623. status = db_ctdb_ltdb_fetch(db, key, &header, mem_ctx, &data);
  624. if (!NT_STATUS_IS_OK(status) &&
  625. !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND))
  626. {
  627. goto done;
  628. }
  629. status = NT_STATUS_OK;
  630. if (data.dsize != sizeof(uint64_t)) {
  631. *seqnum = 0;
  632. goto done;
  633. }
  634. *seqnum = *(uint64_t *)data.dptr;
  635. done:
  636. TALLOC_FREE(mem_ctx);
  637. return status;
  638. }
  639. /**
  640. * Store the database sequence number inside a transaction.
  641. */
  642. static NTSTATUS db_ctdb_store_db_seqnum(struct db_ctdb_transaction_handle *h,
  643. uint64_t seqnum)
  644. {
  645. NTSTATUS status;
  646. const char *keyname = CTDB_DB_SEQNUM_KEY;
  647. TDB_DATA key;
  648. TDB_DATA data;
  649. key = string_term_tdb_data(keyname);
  650. data.dptr = (uint8_t *)&seqnum;
  651. data.dsize = sizeof(uint64_t);
  652. status = db_ctdb_transaction_store(h, key, data);
  653. return status;
  654. }
  655. /*
  656. commit a transaction
  657. */
  658. static int db_ctdb_transaction_commit(struct db_context *db)
  659. {
  660. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  661. struct db_ctdb_ctx);
  662. NTSTATUS rets;
  663. int status;
  664. struct db_ctdb_transaction_handle *h = ctx->transaction;
  665. uint64_t old_seqnum, new_seqnum;
  666. int ret;
  667. if (h == NULL) {
  668. DEBUG(0,(__location__ " transaction commit with no open transaction on db 0x%08x\n", ctx->db_id));
  669. return -1;
  670. }
  671. if (h->nested_cancel) {
  672. db->transaction_cancel(db);
  673. DEBUG(5,(__location__ " Failed transaction commit after nested cancel\n"));
  674. return -1;
  675. }
  676. if (h->nesting != 0) {
  677. h->nesting--;
  678. DEBUG(5, (__location__ " transaction commit on db 0x%08x: nesting %d -> %d\n",
  679. ctx->db_id, ctx->transaction->nesting + 1, ctx->transaction->nesting));
  680. return 0;
  681. }
  682. if (h->m_write == NULL) {
  683. /*
  684. * No changes were made, so don't change the seqnum,
  685. * don't push to other node, just exit with success.
  686. */
  687. ret = 0;
  688. goto done;
  689. }
  690. DEBUG(5,(__location__ " transaction commit on db 0x%08x\n", ctx->db_id));
  691. /*
  692. * As the last db action before committing, bump the database sequence
  693. * number. Note that this undoes all changes to the seqnum records
  694. * performed under the transaction. This record is not meant to be
  695. * modified by user interaction. It is for internal use only...
  696. */
  697. rets = db_ctdb_fetch_db_seqnum_from_db(ctx, &old_seqnum);
  698. if (!NT_STATUS_IS_OK(rets)) {
  699. DEBUG(1, (__location__ " failed to fetch the db sequence number "
  700. "in transaction commit on db 0x%08x\n", ctx->db_id));
  701. ret = -1;
  702. goto done;
  703. }
  704. new_seqnum = old_seqnum + 1;
  705. rets = db_ctdb_store_db_seqnum(h, new_seqnum);
  706. if (!NT_STATUS_IS_OK(rets)) {
  707. DEBUG(1, (__location__ "failed to store the db sequence number "
  708. " in transaction commit on db 0x%08x\n", ctx->db_id));
  709. ret = -1;
  710. goto done;
  711. }
  712. again:
  713. /* tell ctdbd to commit to the other nodes */
  714. rets = ctdbd_control_local(messaging_ctdbd_connection(),
  715. CTDB_CONTROL_TRANS3_COMMIT,
  716. h->ctx->db_id, 0,
  717. db_ctdb_marshall_finish(h->m_write),
  718. NULL, NULL, &status);
  719. if (!NT_STATUS_IS_OK(rets) || status != 0) {
  720. /*
  721. * The TRANS3_COMMIT control should only possibly fail when a
  722. * recovery has been running concurrently. In any case, the db
  723. * will be the same on all nodes, either the new copy or the
  724. * old copy. This can be detected by comparing the old and new
  725. * local sequence numbers.
  726. */
  727. rets = db_ctdb_fetch_db_seqnum_from_db(ctx, &new_seqnum);
  728. if (!NT_STATUS_IS_OK(rets)) {
  729. DEBUG(1, (__location__ " failed to refetch db sequence "
  730. "number after failed TRANS3_COMMIT\n"));
  731. ret = -1;
  732. goto done;
  733. }
  734. if (new_seqnum == old_seqnum) {
  735. /* Recovery prevented all our changes: retry. */
  736. goto again;
  737. } else if (new_seqnum != (old_seqnum + 1)) {
  738. DEBUG(0, (__location__ " ERROR: new_seqnum[%lu] != "
  739. "old_seqnum[%lu] + (0 or 1) after failed "
  740. "TRANS3_COMMIT - this should not happen!\n",
  741. (unsigned long)new_seqnum,
  742. (unsigned long)old_seqnum));
  743. ret = -1;
  744. goto done;
  745. }
  746. /*
  747. * Recovery propagated our changes to all nodes, completing
  748. * our commit for us - succeed.
  749. */
  750. }
  751. ret = 0;
  752. done:
  753. h->ctx->transaction = NULL;
  754. talloc_free(h);
  755. return ret;
  756. }
  757. /*
  758. cancel a transaction
  759. */
  760. static int db_ctdb_transaction_cancel(struct db_context *db)
  761. {
  762. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  763. struct db_ctdb_ctx);
  764. struct db_ctdb_transaction_handle *h = ctx->transaction;
  765. if (h == NULL) {
  766. DEBUG(0,(__location__ " transaction cancel with no open transaction on db 0x%08x\n", ctx->db_id));
  767. return -1;
  768. }
  769. if (h->nesting != 0) {
  770. h->nesting--;
  771. h->nested_cancel = true;
  772. DEBUG(5, (__location__ " transaction cancel on db 0x%08x: nesting %d -> %d\n",
  773. ctx->db_id, ctx->transaction->nesting + 1, ctx->transaction->nesting));
  774. return 0;
  775. }
  776. DEBUG(5,(__location__ " Cancel transaction on db 0x%08x\n", ctx->db_id));
  777. ctx->transaction = NULL;
  778. talloc_free(h);
  779. return 0;
  780. }
  781. static NTSTATUS db_ctdb_store(struct db_record *rec, TDB_DATA data, int flag)
  782. {
  783. struct db_ctdb_rec *crec = talloc_get_type_abort(
  784. rec->private_data, struct db_ctdb_rec);
  785. return db_ctdb_ltdb_store(crec->ctdb_ctx, rec->key, &(crec->header), data);
  786. }
  787. #ifdef HAVE_CTDB_CONTROL_SCHEDULE_FOR_DELETION_DECL
  788. static NTSTATUS db_ctdb_send_schedule_for_deletion(struct db_record *rec)
  789. {
  790. NTSTATUS status;
  791. struct ctdb_control_schedule_for_deletion *dd;
  792. TDB_DATA indata;
  793. int cstatus;
  794. struct db_ctdb_rec *crec = talloc_get_type_abort(
  795. rec->private_data, struct db_ctdb_rec);
  796. indata.dsize = offsetof(struct ctdb_control_schedule_for_deletion, key) + rec->key.dsize;
  797. indata.dptr = talloc_zero_array(crec, uint8_t, indata.dsize);
  798. if (indata.dptr == NULL) {
  799. DEBUG(0, (__location__ " talloc failed!\n"));
  800. return NT_STATUS_NO_MEMORY;
  801. }
  802. dd = (struct ctdb_control_schedule_for_deletion *)(void *)indata.dptr;
  803. dd->db_id = crec->ctdb_ctx->db_id;
  804. dd->hdr = crec->header;
  805. dd->keylen = rec->key.dsize;
  806. memcpy(dd->key, rec->key.dptr, rec->key.dsize);
  807. status = ctdbd_control_local(messaging_ctdbd_connection(),
  808. CTDB_CONTROL_SCHEDULE_FOR_DELETION,
  809. crec->ctdb_ctx->db_id,
  810. CTDB_CTRL_FLAG_NOREPLY, /* flags */
  811. indata,
  812. NULL, /* outdata */
  813. NULL, /* errmsg */
  814. &cstatus);
  815. talloc_free(indata.dptr);
  816. if (!NT_STATUS_IS_OK(status) || cstatus != 0) {
  817. DEBUG(1, (__location__ " Error sending local control "
  818. "SCHEDULE_FOR_DELETION: %s, cstatus = %d\n",
  819. nt_errstr(status), cstatus));
  820. if (NT_STATUS_IS_OK(status)) {
  821. status = NT_STATUS_UNSUCCESSFUL;
  822. }
  823. }
  824. return status;
  825. }
  826. #endif
  827. static NTSTATUS db_ctdb_delete(struct db_record *rec)
  828. {
  829. TDB_DATA data;
  830. NTSTATUS status;
  831. /*
  832. * We have to store the header with empty data. TODO: Fix the
  833. * tdb-level cleanup
  834. */
  835. ZERO_STRUCT(data);
  836. status = db_ctdb_store(rec, data, 0);
  837. if (!NT_STATUS_IS_OK(status)) {
  838. return status;
  839. }
  840. #ifdef HAVE_CTDB_CONTROL_SCHEDULE_FOR_DELETION_DECL
  841. status = db_ctdb_send_schedule_for_deletion(rec);
  842. #endif
  843. return status;
  844. }
  845. static int db_ctdb_record_destr(struct db_record* data)
  846. {
  847. struct db_ctdb_rec *crec = talloc_get_type_abort(
  848. data->private_data, struct db_ctdb_rec);
  849. int threshold;
  850. DEBUG(10, (DEBUGLEVEL > 10
  851. ? "Unlocking db %u key %s\n"
  852. : "Unlocking db %u key %.20s\n",
  853. (int)crec->ctdb_ctx->db_id,
  854. hex_encode_talloc(data, (unsigned char *)data->key.dptr,
  855. data->key.dsize)));
  856. tdb_chainunlock(crec->ctdb_ctx->wtdb->tdb, data->key);
  857. threshold = lp_ctdb_locktime_warn_threshold();
  858. if (threshold != 0) {
  859. double timediff = timeval_elapsed(&crec->lock_time);
  860. if ((timediff * 1000) > threshold) {
  861. DEBUG(0, ("Held tdb lock %f seconds\n", timediff));
  862. }
  863. }
  864. return 0;
  865. }
  866. static struct db_record *fetch_locked_internal(struct db_ctdb_ctx *ctx,
  867. TALLOC_CTX *mem_ctx,
  868. TDB_DATA key)
  869. {
  870. struct db_record *result;
  871. struct db_ctdb_rec *crec;
  872. NTSTATUS status;
  873. TDB_DATA ctdb_data;
  874. int migrate_attempts = 0;
  875. if (!(result = talloc(mem_ctx, struct db_record))) {
  876. DEBUG(0, ("talloc failed\n"));
  877. return NULL;
  878. }
  879. if (!(crec = talloc_zero(result, struct db_ctdb_rec))) {
  880. DEBUG(0, ("talloc failed\n"));
  881. TALLOC_FREE(result);
  882. return NULL;
  883. }
  884. result->private_data = (void *)crec;
  885. crec->ctdb_ctx = ctx;
  886. result->key.dsize = key.dsize;
  887. result->key.dptr = (uint8 *)talloc_memdup(result, key.dptr, key.dsize);
  888. if (result->key.dptr == NULL) {
  889. DEBUG(0, ("talloc failed\n"));
  890. TALLOC_FREE(result);
  891. return NULL;
  892. }
  893. /*
  894. * Do a blocking lock on the record
  895. */
  896. again:
  897. if (DEBUGLEVEL >= 10) {
  898. char *keystr = hex_encode_talloc(result, key.dptr, key.dsize);
  899. DEBUG(10, (DEBUGLEVEL > 10
  900. ? "Locking db %u key %s\n"
  901. : "Locking db %u key %.20s\n",
  902. (int)crec->ctdb_ctx->db_id, keystr));
  903. TALLOC_FREE(keystr);
  904. }
  905. if (tdb_chainlock(ctx->wtdb->tdb, key) != 0) {
  906. DEBUG(3, ("tdb_chainlock failed\n"));
  907. TALLOC_FREE(result);
  908. return NULL;
  909. }
  910. result->store = db_ctdb_store;
  911. result->delete_rec = db_ctdb_delete;
  912. talloc_set_destructor(result, db_ctdb_record_destr);
  913. ctdb_data = tdb_fetch_compat(ctx->wtdb->tdb, key);
  914. /*
  915. * See if we have a valid record and we are the dmaster. If so, we can
  916. * take the shortcut and just return it.
  917. */
  918. if ((ctdb_data.dptr == NULL) ||
  919. (ctdb_data.dsize < sizeof(struct ctdb_ltdb_header)) ||
  920. ((struct ctdb_ltdb_header *)ctdb_data.dptr)->dmaster != get_my_vnn()
  921. #if 0
  922. || (random() % 2 != 0)
  923. #endif
  924. ) {
  925. SAFE_FREE(ctdb_data.dptr);
  926. tdb_chainunlock(ctx->wtdb->tdb, key);
  927. talloc_set_destructor(result, NULL);
  928. migrate_attempts += 1;
  929. DEBUG(10, ("ctdb_data.dptr = %p, dmaster = %u (%u)\n",
  930. ctdb_data.dptr, ctdb_data.dptr ?
  931. ((struct ctdb_ltdb_header *)ctdb_data.dptr)->dmaster : -1,
  932. get_my_vnn()));
  933. status = ctdbd_migrate(messaging_ctdbd_connection(), ctx->db_id,
  934. key);
  935. if (!NT_STATUS_IS_OK(status)) {
  936. DEBUG(5, ("ctdb_migrate failed: %s\n",
  937. nt_errstr(status)));
  938. TALLOC_FREE(result);
  939. return NULL;
  940. }
  941. /* now its migrated, try again */
  942. goto again;
  943. }
  944. if (migrate_attempts > 10) {
  945. DEBUG(0, ("db_ctdb_fetch_locked needed %d attempts\n",
  946. migrate_attempts));
  947. }
  948. GetTimeOfDay(&crec->lock_time);
  949. memcpy(&crec->header, ctdb_data.dptr, sizeof(crec->header));
  950. result->value.dsize = ctdb_data.dsize - sizeof(crec->header);
  951. result->value.dptr = NULL;
  952. if ((result->value.dsize != 0)
  953. && !(result->value.dptr = (uint8 *)talloc_memdup(
  954. result, ctdb_data.dptr + sizeof(crec->header),
  955. result->value.dsize))) {
  956. DEBUG(0, ("talloc failed\n"));
  957. TALLOC_FREE(result);
  958. }
  959. SAFE_FREE(ctdb_data.dptr);
  960. return result;
  961. }
  962. static struct db_record *db_ctdb_fetch_locked(struct db_context *db,
  963. TALLOC_CTX *mem_ctx,
  964. TDB_DATA key)
  965. {
  966. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  967. struct db_ctdb_ctx);
  968. if (ctx->transaction != NULL) {
  969. return db_ctdb_fetch_locked_transaction(ctx, mem_ctx, key);
  970. }
  971. if (db->persistent) {
  972. return db_ctdb_fetch_locked_persistent(ctx, mem_ctx, key);
  973. }
  974. return fetch_locked_internal(ctx, mem_ctx, key);
  975. }
  976. /*
  977. fetch (unlocked, no migration) operation on ctdb
  978. */
  979. static int db_ctdb_fetch(struct db_context *db, TALLOC_CTX *mem_ctx,
  980. TDB_DATA key, TDB_DATA *data)
  981. {
  982. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  983. struct db_ctdb_ctx);
  984. NTSTATUS status;
  985. TDB_DATA ctdb_data;
  986. if (ctx->transaction) {
  987. return db_ctdb_transaction_fetch(ctx, mem_ctx, key, data);
  988. }
  989. if (db->persistent) {
  990. return db_ctdb_fetch_persistent(ctx, mem_ctx, key, data);
  991. }
  992. /* try a direct fetch */
  993. ctdb_data = tdb_fetch_compat(ctx->wtdb->tdb, key);
  994. /*
  995. * See if we have a valid record and we are the dmaster. If so, we can
  996. * take the shortcut and just return it.
  997. * we bypass the dmaster check for persistent databases
  998. */
  999. if ((ctdb_data.dptr != NULL) &&
  1000. (ctdb_data.dsize >= sizeof(struct ctdb_ltdb_header)) &&
  1001. ((struct ctdb_ltdb_header *)ctdb_data.dptr)->dmaster == get_my_vnn())
  1002. {
  1003. /* we are the dmaster - avoid the ctdb protocol op */
  1004. data->dsize = ctdb_data.dsize - sizeof(struct ctdb_ltdb_header);
  1005. if (data->dsize == 0) {
  1006. SAFE_FREE(ctdb_data.dptr);
  1007. data->dptr = NULL;
  1008. return 0;
  1009. }
  1010. data->dptr = (uint8 *)talloc_memdup(
  1011. mem_ctx, ctdb_data.dptr+sizeof(struct ctdb_ltdb_header),
  1012. data->dsize);
  1013. SAFE_FREE(ctdb_data.dptr);
  1014. if (data->dptr == NULL) {
  1015. return -1;
  1016. }
  1017. return 0;
  1018. }
  1019. SAFE_FREE(ctdb_data.dptr);
  1020. /* we weren't able to get it locally - ask ctdb to fetch it for us */
  1021. status = ctdbd_fetch(messaging_ctdbd_connection(), ctx->db_id, key,
  1022. mem_ctx, data);
  1023. if (!NT_STATUS_IS_OK(status)) {
  1024. DEBUG(5, ("ctdbd_fetch failed: %s\n", nt_errstr(status)));
  1025. return -1;
  1026. }
  1027. return 0;
  1028. }
  1029. struct traverse_state {
  1030. struct db_context *db;
  1031. int (*fn)(struct db_record *rec, void *private_data);
  1032. void *private_data;
  1033. };
  1034. static void traverse_callback(TDB_DATA key, TDB_DATA data, void *private_data)
  1035. {
  1036. struct traverse_state *state = (struct traverse_state *)private_data;
  1037. struct db_record *rec;
  1038. TALLOC_CTX *tmp_ctx = talloc_new(state->db);
  1039. /* we have to give them a locked record to prevent races */
  1040. rec = db_ctdb_fetch_locked(state->db, tmp_ctx, key);
  1041. if (rec && rec->value.dsize > 0) {
  1042. state->fn(rec, state->private_data);
  1043. }
  1044. talloc_free(tmp_ctx);
  1045. }
  1046. static int traverse_persistent_callback(TDB_CONTEXT *tdb, TDB_DATA kbuf, TDB_DATA dbuf,
  1047. void *private_data)
  1048. {
  1049. struct traverse_state *state = (struct traverse_state *)private_data;
  1050. struct db_record *rec;
  1051. TALLOC_CTX *tmp_ctx = talloc_new(state->db);
  1052. int ret = 0;
  1053. /*
  1054. * Skip the __db_sequence_number__ key:
  1055. * This is used for persistent transactions internally.
  1056. */
  1057. if (kbuf.dsize == strlen(CTDB_DB_SEQNUM_KEY) + 1 &&
  1058. strcmp((const char*)kbuf.dptr, CTDB_DB_SEQNUM_KEY))
  1059. {
  1060. goto done;
  1061. }
  1062. /* we have to give them a locked record to prevent races */
  1063. rec = db_ctdb_fetch_locked(state->db, tmp_ctx, kbuf);
  1064. if (rec && rec->value.dsize > 0) {
  1065. ret = state->fn(rec, state->private_data);
  1066. }
  1067. done:
  1068. talloc_free(tmp_ctx);
  1069. return ret;
  1070. }
  1071. static int db_ctdb_traverse(struct db_context *db,
  1072. int (*fn)(struct db_record *rec,
  1073. void *private_data),
  1074. void *private_data)
  1075. {
  1076. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  1077. struct db_ctdb_ctx);
  1078. struct traverse_state state;
  1079. state.db = db;
  1080. state.fn = fn;
  1081. state.private_data = private_data;
  1082. if (db->persistent) {
  1083. /* for persistent databases we don't need to do a ctdb traverse,
  1084. we can do a faster local traverse */
  1085. return tdb_traverse(ctx->wtdb->tdb, traverse_persistent_callback, &state);
  1086. }
  1087. ctdbd_traverse(ctx->db_id, traverse_callback, &state);
  1088. return 0;
  1089. }
  1090. static NTSTATUS db_ctdb_store_deny(struct db_record *rec, TDB_DATA data, int flag)
  1091. {
  1092. return NT_STATUS_MEDIA_WRITE_PROTECTED;
  1093. }
  1094. static NTSTATUS db_ctdb_delete_deny(struct db_record *rec)
  1095. {
  1096. return NT_STATUS_MEDIA_WRITE_PROTECTED;
  1097. }
  1098. static void traverse_read_callback(TDB_DATA key, TDB_DATA data, void *private_data)
  1099. {
  1100. struct traverse_state *state = (struct traverse_state *)private_data;
  1101. struct db_record rec;
  1102. rec.key = key;
  1103. rec.value = data;
  1104. rec.store = db_ctdb_store_deny;
  1105. rec.delete_rec = db_ctdb_delete_deny;
  1106. rec.private_data = state->db;
  1107. state->fn(&rec, state->private_data);
  1108. }
  1109. static int traverse_persistent_callback_read(TDB_CONTEXT *tdb, TDB_DATA kbuf, TDB_DATA dbuf,
  1110. void *private_data)
  1111. {
  1112. struct traverse_state *state = (struct traverse_state *)private_data;
  1113. struct db_record rec;
  1114. /*
  1115. * Skip the __db_sequence_number__ key:
  1116. * This is used for persistent transactions internally.
  1117. */
  1118. if (kbuf.dsize == strlen(CTDB_DB_SEQNUM_KEY) + 1 &&
  1119. strncmp((const char*)kbuf.dptr, CTDB_DB_SEQNUM_KEY,
  1120. strlen(CTDB_DB_SEQNUM_KEY)) == 0)
  1121. {
  1122. return 0;
  1123. }
  1124. rec.key = kbuf;
  1125. rec.value = dbuf;
  1126. rec.store = db_ctdb_store_deny;
  1127. rec.delete_rec = db_ctdb_delete_deny;
  1128. rec.private_data = state->db;
  1129. if (rec.value.dsize <= sizeof(struct ctdb_ltdb_header)) {
  1130. /* a deleted record */
  1131. return 0;
  1132. }
  1133. rec.value.dsize -= sizeof(struct ctdb_ltdb_header);
  1134. rec.value.dptr += sizeof(struct ctdb_ltdb_header);
  1135. return state->fn(&rec, state->private_data);
  1136. }
  1137. static int db_ctdb_traverse_read(struct db_context *db,
  1138. int (*fn)(struct db_record *rec,
  1139. void *private_data),
  1140. void *private_data)
  1141. {
  1142. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  1143. struct db_ctdb_ctx);
  1144. struct traverse_state state;
  1145. state.db = db;
  1146. state.fn = fn;
  1147. state.private_data = private_data;
  1148. if (db->persistent) {
  1149. /* for persistent databases we don't need to do a ctdb traverse,
  1150. we can do a faster local traverse */
  1151. return tdb_traverse_read(ctx->wtdb->tdb, traverse_persistent_callback_read, &state);
  1152. }
  1153. ctdbd_traverse(ctx->db_id, traverse_read_callback, &state);
  1154. return 0;
  1155. }
  1156. static int db_ctdb_get_seqnum(struct db_context *db)
  1157. {
  1158. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  1159. struct db_ctdb_ctx);
  1160. return tdb_get_seqnum(ctx->wtdb->tdb);
  1161. }
  1162. static int db_ctdb_get_flags(struct db_context *db)
  1163. {
  1164. struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
  1165. struct db_ctdb_ctx);
  1166. return tdb_get_flags(ctx->wtdb->tdb);
  1167. }
  1168. struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx,
  1169. const char *name,
  1170. int hash_size, int tdb_flags,
  1171. int open_flags, mode_t mode)
  1172. {
  1173. struct db_context *result;
  1174. struct db_ctdb_ctx *db_ctdb;
  1175. char *db_path;
  1176. struct ctdbd_connection *conn;
  1177. if (!lp_clustering()) {
  1178. DEBUG(10, ("Clustering disabled -- no ctdb\n"));
  1179. return NULL;
  1180. }
  1181. if (!(result = talloc_zero(mem_ctx, struct db_context))) {
  1182. DEBUG(0, ("talloc failed\n"));
  1183. TALLOC_FREE(result);
  1184. return NULL;
  1185. }
  1186. if (!(db_ctdb = talloc(result, struct db_ctdb_ctx))) {
  1187. DEBUG(0, ("talloc failed\n"));
  1188. TALLOC_FREE(result);
  1189. return NULL;
  1190. }
  1191. db_ctdb->transaction = NULL;
  1192. db_ctdb->db = result;
  1193. conn = messaging_ctdbd_connection();
  1194. if (conn == NULL) {
  1195. DEBUG(1, ("Could not connect to ctdb\n"));
  1196. TALLOC_FREE(result);
  1197. return NULL;
  1198. }
  1199. if (!NT_STATUS_IS_OK(ctdbd_db_attach(conn, name, &db_ctdb->db_id, tdb_flags))) {
  1200. DEBUG(0, ("ctdbd_db_attach failed for %s\n", name));
  1201. TALLOC_FREE(result);
  1202. return NULL;
  1203. }
  1204. db_path = ctdbd_dbpath(conn, db_ctdb, db_ctdb->db_id);
  1205. result->persistent = ((tdb_flags & TDB_CLEAR_IF_FIRST) == 0);
  1206. /* only pass through specific flags */
  1207. tdb_flags &= TDB_SEQNUM;
  1208. /* honor permissions if user has specified O_CREAT */
  1209. if (open_flags & O_CREAT) {
  1210. chmod(db_path, mode);
  1211. }
  1212. db_ctdb->wtdb = tdb_wrap_open(db_ctdb, db_path, hash_size, tdb_flags, O_RDWR, 0);
  1213. if (db_ctdb->wtdb == NULL) {
  1214. DEBUG(0, ("Could not open tdb %s: %s\n", db_path, strerror(errno)));
  1215. TALLOC_FREE(result);
  1216. return NULL;
  1217. }
  1218. talloc_free(db_path);
  1219. if (result->persistent) {
  1220. db_ctdb->lock_ctx = g_lock_ctx_init(db_ctdb,
  1221. ctdb_conn_msg_ctx(conn));
  1222. if (db_ctdb->lock_ctx == NULL) {
  1223. DEBUG(0, ("g_lock_ctx_init failed\n"));
  1224. TALLOC_FREE(result);
  1225. return NULL;
  1226. }
  1227. }
  1228. result->private_data = (void *)db_ctdb;
  1229. result->fetch_locked = db_ctdb_fetch_locked;
  1230. result->fetch = db_ctdb_fetch;
  1231. result->traverse = db_ctdb_traverse;
  1232. result->traverse_read = db_ctdb_traverse_read;
  1233. result->get_seqnum = db_ctdb_get_seqnum;
  1234. result->get_flags = db_ctdb_get_flags;
  1235. result->transaction_start = db_ctdb_transaction_start;
  1236. result->transaction_commit = db_ctdb_transaction_commit;
  1237. result->transaction_cancel = db_ctdb_transaction_cancel;
  1238. DEBUG(3,("db_open_ctdb: opened database '%s' with dbid 0x%x\n",
  1239. name, db_ctdb->db_id));
  1240. return result;
  1241. }
  1242. #endif