PageRenderTime 35ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/src/service.go

http://github.com/pgm/gospoke
Go | 532 lines | 403 code | 110 blank | 19 comment | 53 complexity | 278adb9c389846a208f29c62dac71fd0 MD5 | raw file
  1. package main
  2. import (
  3. "fmt"
  4. "bytes"
  5. "time"
  6. "sort"
  7. "regexp"
  8. )
  9. const (
  10. STATUS_UP = 1
  11. STATUS_DOWN = 2
  12. STATUS_UNKNOWN = 0
  13. )
  14. const (
  15. OKAY = 0
  16. DEBUG = 1
  17. INFO = 2
  18. WARN = 3
  19. ERROR = 4
  20. )
  21. type Service struct {
  22. Name string
  23. Enabled bool
  24. Monitor *HeartbeatMonitor
  25. Status int
  26. HeartbeatCount int
  27. LastHeartbeatTimestamp int64
  28. Log ServiceLog
  29. Group string
  30. Description string
  31. // filter on summary message
  32. NotificationFilters map[int] *regexp.Regexp
  33. // filter on when notification was generated
  34. NotificationFirstMinute int
  35. NotificationLastMinute int
  36. }
  37. type LogEntry struct {
  38. ServiceName string
  39. Summary string
  40. Severity int
  41. Timestamp int64
  42. Sequence int
  43. }
  44. type ServiceLog struct {
  45. entries []*LogEntry
  46. }
  47. type ServiceHub struct {
  48. timeline *Timeline
  49. services map[string] *Service
  50. notifier *Notifier
  51. logEntryCounter int
  52. }
  53. type ServiceSnapshot struct {
  54. Name string
  55. Status int
  56. LastHeartbeatTimestamp string
  57. IsUp bool
  58. IsDown bool
  59. IsUnknown bool
  60. Enabled bool
  61. Notifications []NotificationSummary
  62. Description string
  63. Group string
  64. FilterCount int
  65. }
  66. type NotificationSummary struct {
  67. Severity int
  68. Count int
  69. }
  70. type ApiError struct {
  71. error string
  72. }
  73. func (e ApiError) String() string {
  74. return e.error
  75. }
  76. type FilterSnapshot struct {
  77. Id int
  78. Expression string
  79. }
  80. // contract between service hub and all threads running outside of
  81. // timeline thread
  82. type ThreadSafeServiceHub interface {
  83. Log(serviceName string, summary string, severity int, timestamp int64) *ApiError
  84. Heartbeat(serviceName string) *ApiError
  85. GetLogEntries(serviceName string) []*LogEntry
  86. RemoveLogEntry(sequence int)
  87. GetServices() []ServiceSnapshot
  88. GetNotificationFilters(serviceName string) []*FilterSnapshot
  89. SetServiceEnabled(serviceName string, enabled bool)
  90. RemoveNotificationFilter(serviceName string, id int)
  91. AddNotificationFilter(serviceName string, expression *regexp.Regexp)
  92. }
  93. type ServiceHubAdapter struct {
  94. hub *ServiceHub
  95. }
  96. func (a *ServiceHubAdapter) AddNotificationFilter(serviceName string, expression *regexp.Regexp) {
  97. c := make(chan *ApiError)
  98. hub := a.hub
  99. hub.timeline.Execute(func() {
  100. hub.AddNotificationFilter(serviceName, expression)
  101. c<-nil
  102. })
  103. <-c
  104. }
  105. func (a *ServiceHubAdapter) RemoveNotificationFilter(serviceName string, id int) {
  106. c := make(chan *ApiError)
  107. hub := a.hub
  108. hub.timeline.Execute(func() {
  109. hub.RemoveNotificationFilter(serviceName, id)
  110. c<-nil
  111. })
  112. <-c
  113. }
  114. func (a *ServiceHubAdapter) SetServiceEnabled(serviceName string, enabled bool) {
  115. c := make(chan *ApiError)
  116. hub := a.hub
  117. hub.timeline.Execute(func() {
  118. c <- hub.SetServiceEnabled(serviceName, enabled)
  119. })
  120. <-c
  121. }
  122. func (a *ServiceHubAdapter) Log(serviceName string, summary string, severity int, timestamp int64) *ApiError {
  123. c := make(chan *ApiError)
  124. hub := a.hub
  125. hub.timeline.Execute(func() {
  126. c <- hub.Log(serviceName, summary, severity, timestamp)
  127. })
  128. return <-c
  129. }
  130. func (a *ServiceHubAdapter) Heartbeat (serviceName string) *ApiError {
  131. c := make(chan *ApiError)
  132. hub := a.hub
  133. hub.timeline.Execute(func() {
  134. service, found := hub.services[serviceName]
  135. if !found {
  136. c <- &ApiError{"No service named \""+serviceName+"\""}
  137. return
  138. }
  139. if service.Monitor != nil {
  140. service.Monitor.Heartbeat()
  141. }
  142. c <- nil
  143. })
  144. return <-c
  145. }
  146. func (a *ServiceHubAdapter) GetServices() []ServiceSnapshot {
  147. c := make(chan []ServiceSnapshot)
  148. hub := a.hub
  149. hub.timeline.Execute(func() {
  150. ss := make([]ServiceSnapshot, 0, len(hub.services))
  151. for _, v := range(hub.services) {
  152. notifications := make([]NotificationSummary, 0, 10)
  153. // count the number of message per severity
  154. counts := make(map[int] int)
  155. for _, l := range(v.Log.entries) {
  156. c, exists := counts[l.Severity]
  157. if !exists {
  158. c = 0
  159. }
  160. c += 1
  161. counts[l.Severity] = c
  162. }
  163. // now add them to the notification list ordered by severity
  164. keys := make([]int, 0, len(notifications))
  165. for k, _ := range(counts) {
  166. keys = append(keys, k)
  167. }
  168. sort.SortInts(keys)
  169. for _, k := range(keys) {
  170. notifications = append(notifications, NotificationSummary{k, counts[k]})
  171. }
  172. var timestamp string
  173. if v.HeartbeatCount == 0 {
  174. timestamp = ""
  175. } else {
  176. timestamp = time.SecondsToLocalTime(v.LastHeartbeatTimestamp/1000).Format(time.Kitchen)
  177. }
  178. ss = append(ss, ServiceSnapshot{v.Name,
  179. v.Status,
  180. timestamp,
  181. v.Status == STATUS_UP, v.Status == STATUS_DOWN, v.Status == STATUS_UNKNOWN,
  182. v.Enabled, notifications, v.Description, v.Group,
  183. len(v.NotificationFilters) })
  184. }
  185. c <- ss
  186. })
  187. return <-c
  188. }
  189. func (a *ServiceHubAdapter) GetLogEntries(serviceName string) []*LogEntry {
  190. c := make(chan []*LogEntry)
  191. hub := a.hub
  192. hub.timeline.Execute(func() {
  193. ss := make([]*LogEntry, 0, 100)
  194. service, found := hub.services[serviceName]
  195. if ! found {
  196. c <- ss
  197. return
  198. }
  199. for _, v := range(service.Log.entries) {
  200. ss = append(ss, v)
  201. }
  202. c <- ss
  203. })
  204. return <-c
  205. }
  206. func (a *ServiceHubAdapter) GetNotificationFilters(serviceName string) []*FilterSnapshot {
  207. c := make(chan []*FilterSnapshot)
  208. hub := a.hub
  209. hub.timeline.Execute(func() {
  210. fs := make([]*FilterSnapshot, 0, 100)
  211. service, found := hub.services[serviceName]
  212. if ! found {
  213. c <- fs
  214. return
  215. }
  216. for k, v := range(service.NotificationFilters) {
  217. fs = append(fs, &FilterSnapshot{k, v.String()})
  218. }
  219. c <- fs
  220. })
  221. return <-c
  222. }
  223. func removeLogEntriesWithId(entries []*LogEntry, sequenceToDel int) []*LogEntry {
  224. dest := 0
  225. for i, v := range(entries) {
  226. if v.Sequence == sequenceToDel {
  227. continue
  228. }
  229. entries[dest] = entries[i]
  230. dest++
  231. }
  232. return entries[:dest]
  233. }
  234. func (a *ServiceHubAdapter) RemoveLogEntry(sequence int) {
  235. c := make(chan bool)
  236. hub := a.hub
  237. hub.timeline.Execute(func() {
  238. for _, service := range(hub.services) {
  239. service.Log.entries = removeLogEntriesWithId(service.Log.entries, sequence)
  240. }
  241. c <- true
  242. })
  243. <-c
  244. }
  245. func NewHubAdapter(hub *ServiceHub) *ServiceHubAdapter {
  246. return &ServiceHubAdapter{hub}
  247. }
  248. ////////////////////////////////////////////////////////////////////////
  249. func NewServiceHub(timeline *Timeline) *ServiceHub {
  250. hub := &ServiceHub{timeline: timeline, services: make(map[string] *Service)}
  251. hub.logEntryCounter = 1
  252. return hub
  253. }
  254. func (h *ServiceHub) AddNotificationFilter(serviceName string, expression *regexp.Regexp) *ApiError{
  255. service, found := h.services[serviceName]
  256. if !found {
  257. return &ApiError{"No service named \""+serviceName+"\""}
  258. }
  259. id := h.nextSequenceId()
  260. service.NotificationFilters[id] = expression
  261. return nil
  262. }
  263. func (h *ServiceHub) RemoveNotificationFilter(serviceName string, id int) *ApiError{
  264. service, found := h.services[serviceName]
  265. if !found {
  266. return &ApiError{"No service named \""+serviceName+"\""}
  267. }
  268. service.NotificationFilters[id] = nil, false
  269. return nil
  270. }
  271. func (h *ServiceHub) SetServiceEnabled(serviceName string, enabled bool) *ApiError {
  272. service, found := h.services[serviceName]
  273. if !found {
  274. return &ApiError{"No service named \""+serviceName+"\""}
  275. }
  276. service.Enabled = enabled
  277. return nil
  278. }
  279. func (h *ServiceHub) Log(serviceName string, summary string, severity int, timestamp int64) *ApiError {
  280. service, found := h.services[serviceName]
  281. if !found {
  282. return &ApiError{"No service named \""+serviceName+"\""}
  283. }
  284. seq := h.nextSequenceId()
  285. service.Log.entries = append(service.Log.entries, &LogEntry{serviceName, summary, severity, timestamp, seq})
  286. h.notifier.CheckAndSendNotifications()
  287. return nil
  288. }
  289. func (h *ServiceHub) AddService(serviceName string, heartbeatTimeout int, group string, description string, enabled bool, nstart int, nstop int) {
  290. var s *Service
  291. s = &Service{Name: serviceName,
  292. Enabled: enabled,
  293. Status: STATUS_UNKNOWN,
  294. Description: description,
  295. Group: group,
  296. NotificationFilters: make(map[int]*regexp.Regexp),
  297. NotificationFirstMinute: nstart,
  298. NotificationLastMinute: nstop }
  299. heartbeatCallback := func(name string, isFailure bool) {
  300. if isFailure {
  301. h.Log(serviceName, "Heartbeat failure", WARN, h.timeline.Now())
  302. s.Status = STATUS_DOWN
  303. } else {
  304. s.Status = STATUS_UP
  305. s.HeartbeatCount += 1
  306. s.LastHeartbeatTimestamp = h.timeline.Now()
  307. }
  308. }
  309. s.Monitor = NewHeartbeatMonitor(h.timeline, serviceName, heartbeatTimeout, heartbeatCallback)
  310. h.services[serviceName] = s
  311. s.Monitor.Start()
  312. }
  313. func (h *ServiceHub) nextSequenceId() int {
  314. h.logEntryCounter += 1
  315. seq := h.logEntryCounter
  316. return seq
  317. }
  318. func (l *ServiceLog) FindAfter(sequence int) []*LogEntry {
  319. result := make([]*LogEntry, 0, len(l.entries))
  320. for _, v := range(l.entries) {
  321. if v.Sequence > sequence {
  322. result = append(result, v)
  323. }
  324. }
  325. return result
  326. }
  327. type ExecutorFn func (command string, input string)
  328. type Notifier struct {
  329. command string
  330. lastCheckSeq int
  331. lastSendTimestamp int64
  332. timeline *Timeline
  333. hub *ServiceHub
  334. executor ExecutorFn
  335. throttle int64
  336. }
  337. func NewNotifier(command string, throttle int, executor ExecutorFn, timeline *Timeline, hub *ServiceHub) *Notifier {
  338. return &Notifier{command: command, throttle: int64(throttle), timeline: timeline, hub: hub, executor: executor}
  339. }
  340. func (n *Notifier) CheckAndSendNotifications() {
  341. now := n.timeline.Now()
  342. if now - n.lastSendTimestamp >= n.throttle {
  343. // enough time has passed since the last send
  344. // so we can flush the event queue
  345. n.lastSendTimestamp = now
  346. n.sendNotificationSummary()
  347. } else {
  348. // too soon, so schedule a check of the queue after enough time has passed
  349. n.timeline.Schedule(n.lastSendTimestamp + n.throttle, func() { n.CheckAndSendNotifications() } )
  350. }
  351. }
  352. func isAllowingNotifications(service *Service, entry *LogEntry ) bool {
  353. summary := entry.Summary
  354. localTime := time.SecondsToLocalTime(entry.Timestamp/1000)
  355. minuteOfDay := localTime.Hour * 60 + localTime.Minute
  356. //log.Printf("minuteOfDay=%d first=%d last=%d\n", minuteOfDay, service.NotificationFirstMinute, service.NotificationLastMinute)
  357. if minuteOfDay < service.NotificationFirstMinute || minuteOfDay > service.NotificationLastMinute {
  358. return false
  359. }
  360. // check each filter
  361. for _, filter := range(service.NotificationFilters) {
  362. if filter.FindStringIndex(summary) != nil {
  363. return false
  364. }
  365. }
  366. return service.Enabled && entry.Severity >= WARN
  367. }
  368. func (n *Notifier) sendNotificationSummary() {
  369. // find all outstanding notifications, grouping them by service
  370. msgsByService := make(map[string] []string)
  371. maxSeq := 0
  372. for k, v := range(n.hub.services) {
  373. e := v.Log.FindAfter(n.lastCheckSeq)
  374. if len(e) > 0 {
  375. msgs := make([]string, 0, len(e))
  376. for _, l := range(e) {
  377. if l.Sequence > maxSeq {
  378. maxSeq = l.Sequence
  379. }
  380. // wait until the last moment to test v.Enabled so that maxSeq gets updated
  381. if isAllowingNotifications(v, l) {
  382. msgs = append(msgs, fmt.Sprintf("%s: %s", k, l.Summary))
  383. }
  384. }
  385. if len(msgs) > 0 {
  386. msgsByService[k] = msgs
  387. }
  388. }
  389. }
  390. // remember where we left off so we can identify what are new notifications
  391. if n.lastCheckSeq < maxSeq {
  392. n.lastCheckSeq = maxSeq
  393. }
  394. if len(msgsByService) > 1 {
  395. msg := bytes.NewBufferString("Multiple services had notifications: ")
  396. for k, v := range(msgsByService) {
  397. msg.WriteString(fmt.Sprintf("%s(%d) ", k, len(v)))
  398. }
  399. n.sendNotification(msg.String())
  400. } else if len(msgsByService) == 1 {
  401. // get the only msg list
  402. var serviceName string
  403. var msgs []string
  404. for tservice, tmsg := range(msgsByService) {
  405. serviceName = tservice
  406. msgs = tmsg
  407. }
  408. if len(msgs) > 1 {
  409. // if we have multiple messages, just send the count of messages and
  410. msg := fmt.Sprintf("%s had %d notifications", serviceName, len(msgs))
  411. n.sendNotification(msg)
  412. } else {
  413. // we must only have one message so just send that
  414. msg := msgs[0]
  415. n.sendNotification(msg)
  416. }
  417. }
  418. // otherwise if there were no messages pending, so do nothing
  419. }
  420. func (n *Notifier) sendNotification( msg string ) {
  421. n.executor(n.command, msg)
  422. }