/zk.go

https://bitbucket.org/chairmanstudios/gozk · Go · 1127 lines · 697 code · 135 blank · 295 comment · 192 complexity · 93f83bd0ed815a5312ab7274010a77b8 MD5 · raw file

  1. // gozk - ZooKeeper support for the Go language
  2. //
  3. // https://wiki.ubuntu.com/gozk
  4. //
  5. // Copyright (c) 2010-2011 Canonical Ltd.
  6. //
  7. // Written by Gustavo Niemeyer <gustavo.niemeyer@canonical.com>
  8. //
  9. package zookeeper
  10. /*
  11. #cgo CFLAGS: -I/usr/local/include/c-client-src -I/usr/local/include/zookeeper
  12. #cgo LDFLAGS: -lzookeeper_mt
  13. #include <zookeeper.h>
  14. #include "helpers.h"
  15. */
  16. import "C"
  17. import (
  18. "fmt"
  19. "sync"
  20. "time"
  21. "unsafe"
  22. )
  23. // -----------------------------------------------------------------------
  24. // Main constants and data types.
  25. // Conn represents a connection to a set of ZooKeeper nodes.
  26. type Conn struct {
  27. watchChannels map[uintptr]chan Event
  28. sessionWatchId uintptr
  29. handle *C.zhandle_t
  30. mutex sync.RWMutex
  31. }
  32. // ClientId represents an established ZooKeeper session. It can be
  33. // passed into Redial to reestablish a connection to an existing session.
  34. type ClientId struct {
  35. cId C.clientid_t
  36. }
  37. // ACL represents one access control list element, providing the permissions
  38. // (one of PERM_*), the scheme ("digest", etc), and the id (scheme-dependent)
  39. // for the access control mechanism in ZooKeeper.
  40. type ACL struct {
  41. Perms uint32
  42. Scheme string
  43. Id string
  44. }
  45. // Event channels are used to provide notifications of changes in the
  46. // ZooKeeper connection state and in specific node aspects.
  47. //
  48. // There are two sources of events: the session channel obtained during
  49. // initialization with Init, and any watch channels obtained
  50. // through one of the W-suffixed functions (GetW, ExistsW, etc).
  51. //
  52. // The session channel will only receive session-level events notifying
  53. // about critical and transient changes in the ZooKeeper connection
  54. // state (STATE_CONNECTED, STATE_EXPIRED_SESSION, etc). On long
  55. // running applications the session channel must *necessarily* be
  56. // observed since certain events like session expirations require an
  57. // explicit reconnection and reestablishment of state (or bailing out).
  58. // Because of that, the buffer used on the session channel has a limited
  59. // size, and a panic will occur if too many events are not collected.
  60. //
  61. // Watch channels enable monitoring state for nodes, and the
  62. // moment they're fired depends on which function was called to
  63. // create them. Note that, unlike in other ZooKeeper interfaces,
  64. // gozk will NOT dispatch unimportant session events such as
  65. // STATE_ASSOCIATING, STATE_CONNECTING and STATE_CONNECTED to
  66. // watch Event channels, since they are transient and disruptive
  67. // to the workflow. Critical state changes such as expirations
  68. // are still delivered to all event channels, though, and the
  69. // transient events may be obsererved in the session channel.
  70. //
  71. // Since every watch channel may receive critical session events, events
  72. // received must not be handled blindly as if the watch requested has
  73. // been fired. To facilitate such tests, Events offer the Ok method,
  74. // and they also have a good String method so they may be used as an
  75. // os.Error value if wanted. E.g.:
  76. //
  77. // event := <-watch
  78. // if !event.Ok() {
  79. // err = event
  80. // return
  81. // }
  82. //
  83. // Note that closed channels will deliver zeroed Event, which means
  84. // event.Type is set to EVENT_CLOSED and event.State is set to STATE_CLOSED,
  85. // to facilitate handling.
  86. type Event struct {
  87. Type int // One of the EVENT_* constants.
  88. Path string // For non-session events, the path of the watched node.
  89. State int // One of the STATE_* constants.
  90. }
  91. // Error represents a ZooKeeper error.
  92. type Error struct {
  93. Op string
  94. Code ErrorCode
  95. // SystemError holds an error if Code is ZSYSTEMERROR.
  96. SystemError error
  97. Path string
  98. }
  99. func (e *Error) Error() string {
  100. s := e.Code.String()
  101. if e.Code == ZSYSTEMERROR && e.SystemError != nil {
  102. s = e.SystemError.Error()
  103. }
  104. if e.Path == "" {
  105. return fmt.Sprintf("zookeeper: %s: %v", e.Op, s)
  106. }
  107. return fmt.Sprintf("zookeeper: %s %q: %v", e.Op, e.Path, s)
  108. }
  109. // IsError returns whether the error is a *Error
  110. // with the given error code.
  111. func IsError(err error, code ErrorCode) bool {
  112. if err, _ := err.(*Error); err != nil {
  113. return err.Code == code
  114. }
  115. return false
  116. }
  117. // ErrorCode represents a kind of ZooKeeper error.
  118. type ErrorCode int
  119. const (
  120. ZOK ErrorCode = C.ZOK
  121. ZSYSTEMERROR ErrorCode = C.ZSYSTEMERROR
  122. ZRUNTIMEINCONSISTENCY ErrorCode = C.ZRUNTIMEINCONSISTENCY
  123. ZDATAINCONSISTENCY ErrorCode = C.ZDATAINCONSISTENCY
  124. ZCONNECTIONLOSS ErrorCode = C.ZCONNECTIONLOSS
  125. ZMARSHALLINGERROR ErrorCode = C.ZMARSHALLINGERROR
  126. ZUNIMPLEMENTED ErrorCode = C.ZUNIMPLEMENTED
  127. ZOPERATIONTIMEOUT ErrorCode = C.ZOPERATIONTIMEOUT
  128. ZBADARGUMENTS ErrorCode = C.ZBADARGUMENTS
  129. ZINVALIDSTATE ErrorCode = C.ZINVALIDSTATE
  130. ZAPIERROR ErrorCode = C.ZAPIERROR
  131. ZNONODE ErrorCode = C.ZNONODE
  132. ZNOAUTH ErrorCode = C.ZNOAUTH
  133. ZBADVERSION ErrorCode = C.ZBADVERSION
  134. ZNOCHILDRENFOREPHEMERALS ErrorCode = C.ZNOCHILDRENFOREPHEMERALS
  135. ZNODEEXISTS ErrorCode = C.ZNODEEXISTS
  136. ZNOTEMPTY ErrorCode = C.ZNOTEMPTY
  137. ZSESSIONEXPIRED ErrorCode = C.ZSESSIONEXPIRED
  138. ZINVALIDCALLBACK ErrorCode = C.ZINVALIDCALLBACK
  139. ZINVALIDACL ErrorCode = C.ZINVALIDACL
  140. ZAUTHFAILED ErrorCode = C.ZAUTHFAILED
  141. ZCLOSING ErrorCode = C.ZCLOSING
  142. ZNOTHING ErrorCode = C.ZNOTHING
  143. ZSESSIONMOVED ErrorCode = C.ZSESSIONMOVED
  144. )
  145. func (code ErrorCode) String() string {
  146. return C.GoString(C.zerror(C.int(code))) // Static, no need to free it.
  147. }
  148. // zkError creates an appropriate error return from
  149. // a ZooKeeper status and the errno return from a C API
  150. // call.
  151. func zkError(rc C.int, cerr error, op, path string) error {
  152. code := ErrorCode(rc)
  153. if code == ZOK {
  154. return nil
  155. }
  156. err := &Error{
  157. Op: op,
  158. Code: code,
  159. Path: path,
  160. }
  161. if code == ZSYSTEMERROR {
  162. err.SystemError = cerr
  163. }
  164. return err
  165. }
  166. func closingError(op, path string) error {
  167. return zkError(C.int(ZCLOSING), nil, op, path)
  168. }
  169. // Constants for SetLogLevel.
  170. const (
  171. LOG_ERROR = C.ZOO_LOG_LEVEL_ERROR
  172. LOG_WARN = C.ZOO_LOG_LEVEL_WARN
  173. LOG_INFO = C.ZOO_LOG_LEVEL_INFO
  174. LOG_DEBUG = C.ZOO_LOG_LEVEL_DEBUG
  175. )
  176. // These are defined as extern. To avoid having to declare them as
  177. // variables here they are inlined, and correctness is ensured on
  178. // init().
  179. // Constants for Create's flags parameter.
  180. const (
  181. EPHEMERAL = 1 << iota
  182. SEQUENCE
  183. )
  184. // Constants for ACL Perms.
  185. const (
  186. PERM_READ = 1 << iota
  187. PERM_WRITE
  188. PERM_CREATE
  189. PERM_DELETE
  190. PERM_ADMIN
  191. PERM_ALL = 0x1f
  192. )
  193. // Constants for Event Type.
  194. const (
  195. EVENT_CREATED = iota + 1
  196. EVENT_DELETED
  197. EVENT_CHANGED
  198. EVENT_CHILD
  199. EVENT_SESSION = -1
  200. EVENT_NOTWATCHING = -2
  201. // Doesn't really exist in zk, but handy for use in zeroed Event
  202. // values (e.g. closed channels).
  203. EVENT_CLOSED = 0
  204. )
  205. // Constants for Event State.
  206. const (
  207. STATE_EXPIRED_SESSION = -112
  208. STATE_AUTH_FAILED = -113
  209. STATE_CONNECTING = 1
  210. STATE_ASSOCIATING = 2
  211. STATE_CONNECTED = 3
  212. // Doesn't really exist in zk, but handy for use in zeroed Event
  213. // values (e.g. closed channels).
  214. STATE_CLOSED = 0
  215. )
  216. func init() {
  217. if EPHEMERAL != C.ZOO_EPHEMERAL ||
  218. SEQUENCE != C.ZOO_SEQUENCE ||
  219. PERM_READ != C.ZOO_PERM_READ ||
  220. PERM_WRITE != C.ZOO_PERM_WRITE ||
  221. PERM_CREATE != C.ZOO_PERM_CREATE ||
  222. PERM_DELETE != C.ZOO_PERM_DELETE ||
  223. PERM_ADMIN != C.ZOO_PERM_ADMIN ||
  224. PERM_ALL != C.ZOO_PERM_ALL ||
  225. EVENT_CREATED != C.ZOO_CREATED_EVENT ||
  226. EVENT_DELETED != C.ZOO_DELETED_EVENT ||
  227. EVENT_CHANGED != C.ZOO_CHANGED_EVENT ||
  228. EVENT_CHILD != C.ZOO_CHILD_EVENT ||
  229. EVENT_SESSION != C.ZOO_SESSION_EVENT ||
  230. EVENT_NOTWATCHING != C.ZOO_NOTWATCHING_EVENT ||
  231. STATE_EXPIRED_SESSION != C.ZOO_EXPIRED_SESSION_STATE ||
  232. STATE_AUTH_FAILED != C.ZOO_AUTH_FAILED_STATE ||
  233. STATE_CONNECTING != C.ZOO_CONNECTING_STATE ||
  234. STATE_ASSOCIATING != C.ZOO_ASSOCIATING_STATE ||
  235. STATE_CONNECTED != C.ZOO_CONNECTED_STATE {
  236. panic("OOPS: Constants don't match C counterparts")
  237. }
  238. SetLogLevel(0)
  239. }
  240. // AuthACL produces an ACL list containing a single ACL which uses
  241. // the provided permissions, with the scheme "auth", and ID "", which
  242. // is used by ZooKeeper to represent any authenticated user.
  243. func AuthACL(perms uint32) []ACL {
  244. return []ACL{{perms, "auth", ""}}
  245. }
  246. // WorldACL produces an ACL list containing a single ACL which uses
  247. // the provided permissions, with the scheme "world", and ID "anyone",
  248. // which is used by ZooKeeper to represent any user at all.
  249. func WorldACL(perms uint32) []ACL {
  250. return []ACL{{perms, "world", "anyone"}}
  251. }
  252. // -----------------------------------------------------------------------
  253. // Event methods.
  254. // Ok returns true in case the event reports zk as being in a usable state.
  255. func (e Event) Ok() bool {
  256. // That's really it for now. Anything else seems to mean zk
  257. // can't be used at the moment.
  258. return e.State == STATE_CONNECTED
  259. }
  260. func (e Event) String() (s string) {
  261. switch e.State {
  262. case STATE_EXPIRED_SESSION:
  263. s = "ZooKeeper session expired"
  264. case STATE_AUTH_FAILED:
  265. s = "ZooKeeper authentication failed"
  266. case STATE_CONNECTING:
  267. s = "ZooKeeper connecting"
  268. case STATE_ASSOCIATING:
  269. s = "ZooKeeper still associating"
  270. case STATE_CONNECTED:
  271. s = "ZooKeeper connected"
  272. case STATE_CLOSED:
  273. s = "ZooKeeper connection closed"
  274. default:
  275. s = fmt.Sprintf("unknown ZooKeeper state %d", e.State)
  276. }
  277. if e.Type == -1 || e.Type == EVENT_SESSION {
  278. return
  279. }
  280. if s != "" {
  281. s += "; "
  282. }
  283. switch e.Type {
  284. case EVENT_CREATED:
  285. s += "path created: "
  286. case EVENT_DELETED:
  287. s += "path deleted: "
  288. case EVENT_CHANGED:
  289. s += "path changed: "
  290. case EVENT_CHILD:
  291. s += "path children changed: "
  292. case EVENT_NOTWATCHING:
  293. s += "not watching: " // !?
  294. case EVENT_SESSION:
  295. // nothing
  296. }
  297. s += e.Path
  298. return
  299. }
  300. // -----------------------------------------------------------------------
  301. // Stat contains detailed information about a node.
  302. type Stat struct {
  303. c C.struct_Stat
  304. }
  305. // Czxid returns the zxid of the change that caused the node to be created.
  306. func (stat *Stat) Czxid() int64 {
  307. return int64(stat.c.czxid)
  308. }
  309. // Mzxid returns the zxid of the change that last modified the node.
  310. func (stat *Stat) Mzxid() int64 {
  311. return int64(stat.c.mzxid)
  312. }
  313. func millisec2time(ms int64) time.Time {
  314. return time.Unix(ms/1e3, ms%1e3*1e6)
  315. }
  316. // CTime returns the time (at millisecond resolution) when the node was
  317. // created.
  318. func (stat *Stat) CTime() time.Time {
  319. return millisec2time(int64(stat.c.ctime))
  320. }
  321. // MTime returns the time (at millisecond resolution) when the node was
  322. // last modified.
  323. func (stat *Stat) MTime() time.Time {
  324. return millisec2time(int64(stat.c.mtime))
  325. }
  326. // Version returns the number of changes to the data of the node.
  327. func (stat *Stat) Version() int {
  328. return int(stat.c.version)
  329. }
  330. // CVersion returns the number of changes to the children of the node.
  331. // This only changes when children are created or removed.
  332. func (stat *Stat) CVersion() int {
  333. return int(stat.c.cversion)
  334. }
  335. // AVersion returns the number of changes to the ACL of the node.
  336. func (stat *Stat) AVersion() int {
  337. return int(stat.c.aversion)
  338. }
  339. // If the node is an ephemeral node, EphemeralOwner returns the session id
  340. // of the owner of the node; otherwise it will return zero.
  341. func (stat *Stat) EphemeralOwner() int64 {
  342. return int64(stat.c.ephemeralOwner)
  343. }
  344. // DataLength returns the length of the data in the node in bytes.
  345. func (stat *Stat) DataLength() int {
  346. return int(stat.c.dataLength)
  347. }
  348. // NumChildren returns the number of children of the node.
  349. func (stat *Stat) NumChildren() int {
  350. return int(stat.c.numChildren)
  351. }
  352. // Pzxid returns the Pzxid of the node, whatever that is.
  353. func (stat *Stat) Pzxid() int64 {
  354. return int64(stat.c.pzxid)
  355. }
  356. // -----------------------------------------------------------------------
  357. // Functions and methods related to ZooKeeper itself.
  358. const bufferSize = 1024 * 1024
  359. // SetLogLevel changes the minimum level of logging output generated
  360. // to adjust the amount of information provided.
  361. func SetLogLevel(level int) {
  362. C.zoo_set_debug_level(C.ZooLogLevel(level))
  363. }
  364. // Dial initializes the communication with a ZooKeeper cluster. The provided
  365. // servers parameter may include multiple server addresses, separated
  366. // by commas, so that the client will automatically attempt to connect
  367. // to another server if one of them stops working for whatever reason.
  368. //
  369. // The recvTimeout parameter, given in nanoseconds, allows controlling
  370. // the amount of time the connection can stay unresponsive before the
  371. // server will be considered problematic.
  372. //
  373. // Session establishment is asynchronous, meaning that this function
  374. // will return before the communication with ZooKeeper is fully established.
  375. // The watch channel receives events of type SESSION_EVENT when any change
  376. // to the state of the established connection happens. See the documentation
  377. // for the Event type for more details.
  378. func Dial(servers string, recvTimeout time.Duration) (*Conn, <-chan Event, error) {
  379. return dial(servers, recvTimeout, nil)
  380. }
  381. // Redial is equivalent to Dial, but attempts to reestablish an existing session
  382. // identified via the clientId parameter.
  383. func Redial(servers string, recvTimeout time.Duration, clientId *ClientId) (*Conn, <-chan Event, error) {
  384. return dial(servers, recvTimeout, clientId)
  385. }
  386. func dial(servers string, recvTimeout time.Duration, clientId *ClientId) (*Conn, <-chan Event, error) {
  387. conn := &Conn{}
  388. conn.watchChannels = make(map[uintptr]chan Event)
  389. var cId *C.clientid_t
  390. if clientId != nil {
  391. cId = &clientId.cId
  392. }
  393. watchId, watchChannel := conn.createWatch(true)
  394. conn.sessionWatchId = watchId
  395. cservers := C.CString(servers)
  396. handle, cerr := C.zookeeper_init(cservers, C.watch_handler, C.int(recvTimeout/1e6), cId, unsafe.Pointer(watchId), 0)
  397. C.free(unsafe.Pointer(cservers))
  398. if handle == nil {
  399. conn.closeAllWatches()
  400. return nil, nil, zkError(C.int(ZSYSTEMERROR), cerr, "dial", "")
  401. }
  402. conn.handle = handle
  403. runWatchLoop()
  404. return conn, watchChannel, nil
  405. }
  406. // ClientId returns the client ID for the existing session with ZooKeeper.
  407. // This is useful to reestablish an existing session via ReInit.
  408. func (conn *Conn) ClientId() *ClientId {
  409. conn.mutex.RLock()
  410. defer conn.mutex.RUnlock()
  411. return &ClientId{*C.zoo_client_id(conn.handle)}
  412. }
  413. // Close terminates the ZooKeeper interaction.
  414. func (conn *Conn) Close() error {
  415. // Protect from concurrency around conn.handle change.
  416. conn.mutex.Lock()
  417. defer conn.mutex.Unlock()
  418. if conn.handle == nil {
  419. // ZooKeeper may hang indefinitely if a handler is closed twice,
  420. // so we get in the way and prevent it from happening.
  421. return closingError("close", "")
  422. }
  423. rc, cerr := C.zookeeper_close(conn.handle)
  424. conn.closeAllWatches()
  425. stopWatchLoop()
  426. // At this point, nothing else should need conn.handle.
  427. conn.handle = nil
  428. return zkError(rc, cerr, "close", "")
  429. }
  430. // Get returns the data and status from an existing node. err will be nil,
  431. // unless an error is found. Attempting to retrieve data from a non-existing
  432. // node is an error.
  433. func (conn *Conn) Get(path string) (data string, stat *Stat, err error) {
  434. conn.mutex.RLock()
  435. defer conn.mutex.RUnlock()
  436. if conn.handle == nil {
  437. return "", nil, closingError("get", path)
  438. }
  439. cpath := C.CString(path)
  440. cbuffer := (*C.char)(C.malloc(bufferSize))
  441. cbufferLen := C.int(bufferSize)
  442. defer C.free(unsafe.Pointer(cpath))
  443. defer C.free(unsafe.Pointer(cbuffer))
  444. var cstat Stat
  445. rc, cerr := C.zoo_wget(conn.handle, cpath, nil, nil, cbuffer, &cbufferLen, &cstat.c)
  446. if rc != C.ZOK {
  447. return "", nil, zkError(rc, cerr, "get", path)
  448. }
  449. result := C.GoStringN(cbuffer, cbufferLen)
  450. return result, &cstat, nil
  451. }
  452. // GetW works like Get but also returns a channel that will receive
  453. // a single Event value when the data or existence of the given ZooKeeper
  454. // node changes or when critical session events happen. See the
  455. // documentation of the Event type for more details.
  456. func (conn *Conn) GetW(path string) (data string, stat *Stat, watch <-chan Event, err error) {
  457. conn.mutex.RLock()
  458. defer conn.mutex.RUnlock()
  459. if conn.handle == nil {
  460. return "", nil, nil, closingError("getw", path)
  461. }
  462. cpath := C.CString(path)
  463. cbuffer := (*C.char)(C.malloc(bufferSize))
  464. cbufferLen := C.int(bufferSize)
  465. defer C.free(unsafe.Pointer(cpath))
  466. defer C.free(unsafe.Pointer(cbuffer))
  467. watchId, watchChannel := conn.createWatch(true)
  468. var cstat Stat
  469. rc, cerr := C.zoo_wget(conn.handle, cpath, C.watch_handler, unsafe.Pointer(watchId), cbuffer, &cbufferLen, &cstat.c)
  470. if rc != C.ZOK {
  471. conn.forgetWatch(watchId)
  472. return "", nil, nil, zkError(rc, cerr, "getw", path)
  473. }
  474. result := C.GoStringN(cbuffer, cbufferLen)
  475. return result, &cstat, watchChannel, nil
  476. }
  477. // Children returns the children list and status from an existing node.
  478. // Attempting to retrieve the children list from a non-existent node is an error.
  479. func (conn *Conn) Children(path string) (children []string, stat *Stat, err error) {
  480. conn.mutex.RLock()
  481. defer conn.mutex.RUnlock()
  482. if conn.handle == nil {
  483. return nil, nil, closingError("children", path)
  484. }
  485. cpath := C.CString(path)
  486. defer C.free(unsafe.Pointer(cpath))
  487. cvector := C.struct_String_vector{}
  488. var cstat Stat
  489. rc, cerr := C.zoo_wget_children2(conn.handle, cpath, nil, nil, &cvector, &cstat.c)
  490. // Can't happen if rc != 0, but avoid potential memory leaks in the future.
  491. if cvector.count != 0 {
  492. children = parseStringVector(&cvector)
  493. }
  494. if rc == C.ZOK {
  495. stat = &cstat
  496. } else {
  497. err = zkError(rc, cerr, "children", path)
  498. }
  499. return
  500. }
  501. // ChildrenW works like Children but also returns a channel that will
  502. // receive a single Event value when a node is added or removed under the
  503. // provided path or when critical session events happen. See the documentation
  504. // of the Event type for more details.
  505. func (conn *Conn) ChildrenW(path string) (children []string, stat *Stat, watch <-chan Event, err error) {
  506. conn.mutex.RLock()
  507. defer conn.mutex.RUnlock()
  508. if conn.handle == nil {
  509. return nil, nil, nil, closingError("childrenw", path)
  510. }
  511. cpath := C.CString(path)
  512. defer C.free(unsafe.Pointer(cpath))
  513. watchId, watchChannel := conn.createWatch(true)
  514. cvector := C.struct_String_vector{}
  515. var cstat Stat
  516. rc, cerr := C.zoo_wget_children2(conn.handle, cpath, C.watch_handler, unsafe.Pointer(watchId), &cvector, &cstat.c)
  517. // Can't happen if rc != 0, but avoid potential memory leaks in the future.
  518. if cvector.count != 0 {
  519. children = parseStringVector(&cvector)
  520. }
  521. if rc == C.ZOK {
  522. stat = &cstat
  523. watch = watchChannel
  524. } else {
  525. conn.forgetWatch(watchId)
  526. err = zkError(rc, cerr, "childrenw", path)
  527. }
  528. return
  529. }
  530. func parseStringVector(cvector *C.struct_String_vector) []string {
  531. vector := make([]string, cvector.count)
  532. dataStart := uintptr(unsafe.Pointer(cvector.data))
  533. uintptrSize := unsafe.Sizeof(dataStart)
  534. for i := 0; i != len(vector); i++ {
  535. cpathPos := dataStart + uintptr(i)*uintptrSize
  536. cpath := *(**C.char)(unsafe.Pointer(cpathPos))
  537. vector[i] = C.GoString(cpath)
  538. }
  539. C.deallocate_String_vector(cvector)
  540. return vector
  541. }
  542. // Exists checks if a node exists at the given path. If it does,
  543. // stat will contain meta information on the existing node, otherwise
  544. // it will be nil.
  545. func (conn *Conn) Exists(path string) (stat *Stat, err error) {
  546. conn.mutex.RLock()
  547. defer conn.mutex.RUnlock()
  548. if conn.handle == nil {
  549. return nil, closingError("exists", path)
  550. }
  551. cpath := C.CString(path)
  552. defer C.free(unsafe.Pointer(cpath))
  553. var cstat Stat
  554. rc, cerr := C.zoo_wexists(conn.handle, cpath, nil, nil, &cstat.c)
  555. // We diverge a bit from the usual here: a ZNONODE is not an error
  556. // for an exists call, otherwise every Exists call would have to check
  557. // for err != nil and err.Code() != ZNONODE.
  558. if rc == C.ZOK {
  559. stat = &cstat
  560. } else if rc != C.ZNONODE {
  561. err = zkError(rc, cerr, "exists", path)
  562. }
  563. return
  564. }
  565. // ExistsW works like Exists but also returns a channel that will
  566. // receive an Event value when a node is created in case the returned
  567. // stat is nil and the node didn't exist, or when the existing node
  568. // is removed. It will also receive critical session events. See the
  569. // documentation of the Event type for more details.
  570. func (conn *Conn) ExistsW(path string) (stat *Stat, watch <-chan Event, err error) {
  571. conn.mutex.RLock()
  572. defer conn.mutex.RUnlock()
  573. if conn.handle == nil {
  574. return nil, nil, closingError("existsw", path)
  575. }
  576. cpath := C.CString(path)
  577. defer C.free(unsafe.Pointer(cpath))
  578. watchId, watchChannel := conn.createWatch(true)
  579. var cstat Stat
  580. rc, cerr := C.zoo_wexists(conn.handle, cpath, C.watch_handler, unsafe.Pointer(watchId), &cstat.c)
  581. // We diverge a bit from the usual here: a ZNONODE is not an error
  582. // for an exists call, otherwise every Exists call would have to check
  583. // for err != nil and err.Code() != ZNONODE.
  584. switch ErrorCode(rc) {
  585. case ZOK:
  586. stat = &cstat
  587. watch = watchChannel
  588. case ZNONODE:
  589. watch = watchChannel
  590. default:
  591. conn.forgetWatch(watchId)
  592. err = zkError(rc, cerr, "existsw", path)
  593. }
  594. return
  595. }
  596. // Create creates a node at the given path with the given data. The
  597. // provided flags may determine features such as whether the node is
  598. // ephemeral or not, or whether it should have a sequence number
  599. // attached to it, and the provided ACLs will determine who can access
  600. // the node and under which circumstances.
  601. //
  602. // The returned path is useful in cases where the created path may differ
  603. // from the requested one, such as when a sequence number is appended
  604. // to it due to the use of the gozk.SEQUENCE flag.
  605. func (conn *Conn) Create(path, value string, flags int, aclv []ACL) (pathCreated string, err error) {
  606. conn.mutex.RLock()
  607. defer conn.mutex.RUnlock()
  608. if conn.handle == nil {
  609. return "", closingError("close", path)
  610. }
  611. cpath := C.CString(path)
  612. cvalue := C.CString(value)
  613. defer C.free(unsafe.Pointer(cpath))
  614. defer C.free(unsafe.Pointer(cvalue))
  615. caclv := buildACLVector(aclv)
  616. defer C.deallocate_ACL_vector(caclv)
  617. // Allocate additional space for the sequence (10 bytes should be enough).
  618. cpathLen := C.size_t(len(path) + 32)
  619. cpathCreated := (*C.char)(C.malloc(cpathLen))
  620. defer C.free(unsafe.Pointer(cpathCreated))
  621. rc, cerr := C.zoo_create(conn.handle, cpath, cvalue, C.int(len(value)), caclv, C.int(flags), cpathCreated, C.int(cpathLen))
  622. if rc == C.ZOK {
  623. pathCreated = C.GoString(cpathCreated)
  624. } else {
  625. err = zkError(rc, cerr, "create", path)
  626. }
  627. return
  628. }
  629. // Set modifies the data for the existing node at the given path, replacing it
  630. // by the provided value. If version is not -1, the operation will only
  631. // succeed if the node is still at the given version when the replacement
  632. // happens as an atomic operation. The returned Stat value will contain
  633. // data for the resulting node, after the operation is performed.
  634. //
  635. // It is an error to attempt to set the data of a non-existing node with
  636. // this function. In these cases, use Create instead.
  637. func (conn *Conn) Set(path, value string, version int) (stat *Stat, err error) {
  638. conn.mutex.RLock()
  639. defer conn.mutex.RUnlock()
  640. if conn.handle == nil {
  641. return nil, closingError("set", path)
  642. }
  643. cpath := C.CString(path)
  644. cvalue := C.CString(value)
  645. defer C.free(unsafe.Pointer(cpath))
  646. defer C.free(unsafe.Pointer(cvalue))
  647. var cstat Stat
  648. rc, cerr := C.zoo_set2(conn.handle, cpath, cvalue, C.int(len(value)), C.int(version), &cstat.c)
  649. if rc == C.ZOK {
  650. stat = &cstat
  651. } else {
  652. err = zkError(rc, cerr, "set", path)
  653. }
  654. return
  655. }
  656. // Delete removes the node at path. If version is not -1, the operation
  657. // will only succeed if the node is still at this version when the
  658. // node is deleted as an atomic operation.
  659. func (conn *Conn) Delete(path string, version int) (err error) {
  660. conn.mutex.RLock()
  661. defer conn.mutex.RUnlock()
  662. if conn.handle == nil {
  663. return closingError("delete", path)
  664. }
  665. cpath := C.CString(path)
  666. defer C.free(unsafe.Pointer(cpath))
  667. rc, cerr := C.zoo_delete(conn.handle, cpath, C.int(version))
  668. return zkError(rc, cerr, "delete", path)
  669. }
  670. // AddAuth adds a new authentication certificate to the ZooKeeper
  671. // interaction. The scheme parameter will specify how to handle the
  672. // authentication information, while the cert parameter provides the
  673. // identity data itself. For instance, the "digest" scheme requires
  674. // a pair like "username:password" to be provided as the certificate.
  675. func (conn *Conn) AddAuth(scheme, cert string) error {
  676. conn.mutex.RLock()
  677. defer conn.mutex.RUnlock()
  678. if conn.handle == nil {
  679. return closingError("addauth", "")
  680. }
  681. cscheme := C.CString(scheme)
  682. ccert := C.CString(cert)
  683. defer C.free(unsafe.Pointer(cscheme))
  684. defer C.free(unsafe.Pointer(ccert))
  685. data := C.create_completion_data()
  686. if data == nil {
  687. panic("Failed to create completion data")
  688. }
  689. defer C.destroy_completion_data(data)
  690. rc, cerr := C.zoo_add_auth(conn.handle, cscheme, ccert, C.int(len(cert)), C.handle_void_completion, unsafe.Pointer(data))
  691. if rc != C.ZOK {
  692. return zkError(rc, cerr, "addauth", "")
  693. }
  694. C.wait_for_completion(data)
  695. rc = C.int(uintptr(data.data))
  696. return zkError(rc, nil, "addauth", "")
  697. }
  698. // ACL returns the access control list for path.
  699. func (conn *Conn) ACL(path string) ([]ACL, *Stat, error) {
  700. conn.mutex.RLock()
  701. defer conn.mutex.RUnlock()
  702. if conn.handle == nil {
  703. return nil, nil, closingError("acl", path)
  704. }
  705. cpath := C.CString(path)
  706. defer C.free(unsafe.Pointer(cpath))
  707. caclv := C.struct_ACL_vector{}
  708. var cstat Stat
  709. rc, cerr := C.zoo_get_acl(conn.handle, cpath, &caclv, &cstat.c)
  710. if rc != C.ZOK {
  711. return nil, nil, zkError(rc, cerr, "acl", path)
  712. }
  713. aclv := parseACLVector(&caclv)
  714. return aclv, &cstat, nil
  715. }
  716. // SetACL changes the access control list for path.
  717. func (conn *Conn) SetACL(path string, aclv []ACL, version int) error {
  718. conn.mutex.RLock()
  719. defer conn.mutex.RUnlock()
  720. if conn.handle == nil {
  721. return closingError("setacl", path)
  722. }
  723. cpath := C.CString(path)
  724. defer C.free(unsafe.Pointer(cpath))
  725. caclv := buildACLVector(aclv)
  726. defer C.deallocate_ACL_vector(caclv)
  727. rc, cerr := C.zoo_set_acl(conn.handle, cpath, C.int(version), caclv)
  728. return zkError(rc, cerr, "setacl", path)
  729. }
  730. func parseACLVector(caclv *C.struct_ACL_vector) []ACL {
  731. structACLSize := unsafe.Sizeof(C.struct_ACL{})
  732. aclv := make([]ACL, caclv.count)
  733. dataStart := uintptr(unsafe.Pointer(caclv.data))
  734. for i := 0; i != int(caclv.count); i++ {
  735. caclPos := dataStart + uintptr(i)*structACLSize
  736. cacl := (*C.struct_ACL)(unsafe.Pointer(caclPos))
  737. acl := &aclv[i]
  738. acl.Perms = uint32(cacl.perms)
  739. acl.Scheme = C.GoString(cacl.id.scheme)
  740. acl.Id = C.GoString(cacl.id.id)
  741. }
  742. C.deallocate_ACL_vector(caclv)
  743. return aclv
  744. }
  745. func buildACLVector(aclv []ACL) *C.struct_ACL_vector {
  746. structACLSize := unsafe.Sizeof(C.struct_ACL{})
  747. data := C.calloc(C.size_t(len(aclv)), C.size_t(structACLSize))
  748. if data == nil {
  749. panic("ACL data allocation failed")
  750. }
  751. caclv := &C.struct_ACL_vector{}
  752. caclv.data = (*C.struct_ACL)(data)
  753. caclv.count = C.int32_t(len(aclv))
  754. dataStart := uintptr(unsafe.Pointer(caclv.data))
  755. for i, acl := range aclv {
  756. caclPos := dataStart + uintptr(i)*structACLSize
  757. cacl := (*C.struct_ACL)(unsafe.Pointer(caclPos))
  758. cacl.perms = C.int32_t(acl.Perms)
  759. // C.deallocate_ACL_vector() will also handle deallocation of these.
  760. cacl.id.scheme = C.CString(acl.Scheme)
  761. cacl.id.id = C.CString(acl.Id)
  762. }
  763. return caclv
  764. }
  765. // -----------------------------------------------------------------------
  766. // RetryChange utility method.
  767. type ChangeFunc func(oldValue string, oldStat *Stat) (newValue string, err error)
  768. // RetryChange runs changeFunc to attempt to atomically change path
  769. // in a lock free manner, and retries in case there was another
  770. // concurrent change between reading and writing the node.
  771. //
  772. // changeFunc must work correctly if called multiple times in case
  773. // the modification fails due to concurrent changes, and it may return
  774. // an error that will cause the the RetryChange function to stop and
  775. // return the same error.
  776. //
  777. // This mechanism is not suitable for a node that is frequently modified
  778. // concurrently. For those cases, consider using a pessimistic locking
  779. // mechanism.
  780. //
  781. // This is the detailed operation flow for RetryChange:
  782. //
  783. // 1. Attempt to read the node. In case the node exists, but reading it
  784. // fails, stop and return the error found.
  785. //
  786. // 2. Call the changeFunc with the current node value and stat,
  787. // or with an empty string and nil stat, if the node doesn't yet exist.
  788. // If the changeFunc returns an error, stop and return the same error.
  789. //
  790. // 3. If the changeFunc returns no errors, use the string returned as
  791. // the new candidate value for the node, and attempt to either create
  792. // the node, if it didn't exist, or to change its contents at the specified
  793. // version. If this procedure fails due to conflicts (concurrent changes
  794. // in the same node), repeat from step 1. If this procedure fails with any
  795. // other error, stop and return the error found.
  796. //
  797. func (conn *Conn) RetryChange(path string, flags int, acl []ACL, changeFunc ChangeFunc) error {
  798. for {
  799. oldValue, oldStat, err := conn.Get(path)
  800. if err != nil && !IsError(err, ZNONODE) {
  801. return err
  802. }
  803. newValue, err := changeFunc(oldValue, oldStat)
  804. if err != nil {
  805. return err
  806. }
  807. if oldStat == nil {
  808. _, err := conn.Create(path, newValue, flags, acl)
  809. if err == nil || !IsError(err, ZNODEEXISTS) {
  810. return err
  811. }
  812. continue
  813. }
  814. if newValue == oldValue {
  815. return nil // Nothing to do.
  816. }
  817. _, err = conn.Set(path, newValue, oldStat.Version())
  818. if err == nil || !IsError(err, ZBADVERSION) && !IsError(err, ZNONODE) {
  819. return err
  820. }
  821. }
  822. panic("not reached")
  823. }
  824. // -----------------------------------------------------------------------
  825. // Watching mechanism.
  826. // The bridging of watches into Go is slightly tricky because Cgo doesn't
  827. // yet provide a nice way to callback from C into a Go routine, so we do
  828. // this by hand. That bridging works the following way:
  829. //
  830. // Whenever a *W method is called, it will return a channel which
  831. // outputs Event values. Internally, a map is used to maintain references
  832. // between an unique integer key (the watchId), and the event channel. The
  833. // watchId is then handed to the C ZooKeeper library as the watch context,
  834. // so that we get it back when events happen. Using an integer key as the
  835. // watch context rather than a pointer is needed because there's no guarantee
  836. // that in the future the GC will not move objects around, and also because
  837. // a strong reference is needed on the Go side so that the channel is not
  838. // garbage-collected.
  839. //
  840. // So, this is what's done to establish the watch. The interesting part
  841. // lies in the other side of this logic, when events actually happen.
  842. //
  843. // Since Cgo doesn't allow calling back into Go, we actually fire a new
  844. // goroutine the very first time Init is called, and allow it to block
  845. // in a pthread condition variable within a C function. This condition
  846. // will only be notified once a ZooKeeper watch callback appends new
  847. // entries to the event list. When this happens, the C function returns
  848. // and we get back into Go land with the pointer to the watch data,
  849. // including the watchId and other event details such as type and path.
  850. var watchMutex sync.Mutex
  851. var watchConns = make(map[uintptr]*Conn)
  852. var watchCounter uintptr
  853. var watchLoopCounter int
  854. // CountPendingWatches returns the number of pending watches which have
  855. // not been fired yet, across all ZooKeeper instances. This is useful
  856. // mostly as a debugging and testing aid.
  857. func CountPendingWatches() int {
  858. watchMutex.Lock()
  859. count := len(watchConns)
  860. watchMutex.Unlock()
  861. return count
  862. }
  863. // createWatch creates and registers a watch, returning the watch id
  864. // and channel.
  865. func (conn *Conn) createWatch(session bool) (watchId uintptr, watchChannel chan Event) {
  866. buf := 1 // session/watch event
  867. if session {
  868. buf = 32
  869. }
  870. watchChannel = make(chan Event, buf)
  871. watchMutex.Lock()
  872. defer watchMutex.Unlock()
  873. watchId = watchCounter
  874. watchCounter += 1
  875. conn.watchChannels[watchId] = watchChannel
  876. watchConns[watchId] = conn
  877. return
  878. }
  879. // forgetWatch cleans resources used by watchId and prevents it
  880. // from ever getting delivered. It shouldn't be used if there's any
  881. // chance the watch channel is still visible and not closed, since
  882. // it might mean a goroutine would be blocked forever.
  883. func (conn *Conn) forgetWatch(watchId uintptr) {
  884. watchMutex.Lock()
  885. defer watchMutex.Unlock()
  886. delete(conn.watchChannels, watchId)
  887. delete(watchConns, watchId)
  888. }
  889. // closeAllWatches closes all watch channels for conn.
  890. func (conn *Conn) closeAllWatches() {
  891. watchMutex.Lock()
  892. defer watchMutex.Unlock()
  893. for watchId, ch := range conn.watchChannels {
  894. close(ch)
  895. delete(conn.watchChannels, watchId)
  896. delete(watchConns, watchId)
  897. }
  898. }
  899. // sendEvent delivers the event to the watchId event channel. If the
  900. // event channel is a watch event channel, the event is delivered,
  901. // the channel is closed, and resources are freed.
  902. func sendEvent(watchId uintptr, event Event) {
  903. if event.State == STATE_CLOSED {
  904. panic("Attempted to send a CLOSED event")
  905. }
  906. watchMutex.Lock()
  907. defer watchMutex.Unlock()
  908. conn, ok := watchConns[watchId]
  909. if !ok {
  910. return
  911. }
  912. if event.Type == EVENT_SESSION && watchId != conn.sessionWatchId {
  913. // All session events on non-session watches will be delivered
  914. // and cause the watch to be closed early. We purposefully do
  915. // that to enforce a simpler model that takes hiccups as
  916. // important events that cause code to reestablish the state
  917. // from a pristine and well known good start.
  918. if event.State == STATE_CONNECTED {
  919. // That means the watch was established while we were still
  920. // connecting to zk, but we're somewhat strict about only
  921. // dealing with watches when in a well known good state.
  922. // Make the intent more clear by tweaking the code.
  923. event.State = STATE_CONNECTING
  924. }
  925. }
  926. ch := conn.watchChannels[watchId]
  927. if ch == nil {
  928. return
  929. }
  930. select {
  931. case ch <- event:
  932. default:
  933. // Channel not available for sending, which means session
  934. // events are necessarily involved (trivial events go
  935. // straight to the buffer), and the application isn't paying
  936. // attention for long enough to have the buffer filled up.
  937. // Break down now rather than leaking forever.
  938. if watchId == conn.sessionWatchId {
  939. panic("Session event channel buffer is full")
  940. } else {
  941. panic("Watch event channel buffer is full")
  942. }
  943. }
  944. if watchId != conn.sessionWatchId {
  945. delete(conn.watchChannels, watchId)
  946. delete(watchConns, watchId)
  947. close(ch)
  948. }
  949. }
  950. // runWatchLoop start the event loop to collect events from the C
  951. // library and dispatch them into Go land. Calling this function
  952. // multiple times will only increase a counter, rather than
  953. // getting multiple watch loops running.
  954. func runWatchLoop() {
  955. watchMutex.Lock()
  956. if watchLoopCounter == 0 {
  957. go _watchLoop()
  958. }
  959. watchLoopCounter += 1
  960. watchMutex.Unlock()
  961. }
  962. // stopWatchLoop decrements the event loop counter. For the moment,
  963. // the event loop doesn't actually stop, but some day we can easily
  964. // implement termination of the loop if necessary.
  965. func stopWatchLoop() {
  966. watchMutex.Lock()
  967. watchLoopCounter -= 1
  968. if watchLoopCounter == 0 {
  969. // Not really stopping right now, so let's just
  970. // avoid it from running again.
  971. watchLoopCounter += 1
  972. }
  973. watchMutex.Unlock()
  974. }
  975. // Loop and block in a C call waiting for a watch to be fired. When
  976. // it fires, handle the watch by dispatching it to the correct event
  977. // channel, and go back onto waiting mode.
  978. func _watchLoop() {
  979. for {
  980. // This will block until there's a watch event is available.
  981. data := C.wait_for_watch()
  982. event := Event{
  983. Type: int(data.event_type),
  984. Path: C.GoString(data.event_path),
  985. State: int(data.connection_state),
  986. }
  987. watchId := uintptr(data.watch_context)
  988. C.destroy_watch_data(data)
  989. sendEvent(watchId, event)
  990. }
  991. }