PageRenderTime 3090ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/sandbox/plugins/sandbox_decoder.go

https://gitlab.com/karouf/heka
Go | 383 lines | 326 code | 33 blank | 24 comment | 105 complexity | 32dbbdb7b72fb1f4f649427b117df659 MD5 | raw file
  1. /***** BEGIN LICENSE BLOCK *****
  2. # This Source Code Form is subject to the terms of the Mozilla Public
  3. # License, v. 2.0. If a copy of the MPL was not distributed with this file,
  4. # You can obtain one at http://mozilla.org/MPL/2.0/.
  5. #
  6. # The Initial Developer of the Original Code is the Mozilla Foundation.
  7. # Portions created by the Initial Developer are Copyright (C) 2013-2015
  8. # the Initial Developer. All Rights Reserved.
  9. #
  10. # Contributor(s):
  11. # Mike Trinkala (trink@mozilla.com)
  12. # Rob Miller (rmiller@mozilla.com)
  13. #
  14. # ***** END LICENSE BLOCK *****/
  15. package plugins
  16. import (
  17. "fmt"
  18. "math/rand"
  19. "os"
  20. "path/filepath"
  21. "regexp"
  22. "sync"
  23. "sync/atomic"
  24. "time"
  25. "github.com/gogo/protobuf/proto"
  26. "github.com/mozilla-services/heka/message"
  27. "github.com/mozilla-services/heka/pipeline"
  28. . "github.com/mozilla-services/heka/sandbox"
  29. "github.com/mozilla-services/heka/sandbox/lua"
  30. "github.com/pborman/uuid"
  31. )
  32. // Decoder for converting structured/unstructured data into Heka messages.
  33. type SandboxDecoder struct {
  34. processMessageCount int64
  35. processMessageFailures int64
  36. processMessageSamples int64
  37. processMessageDuration int64
  38. sb Sandbox
  39. sbc *SandboxConfig
  40. preservationFile string
  41. reportLock sync.Mutex
  42. sample bool
  43. pack *pipeline.PipelinePack
  44. packs []*pipeline.PipelinePack
  45. dRunner pipeline.DecoderRunner
  46. name string
  47. tz *time.Location
  48. sampleDenominator int
  49. pConfig *pipeline.PipelineConfig
  50. }
  51. func (s *SandboxDecoder) ConfigStruct() interface{} {
  52. return NewSandboxConfig(s.pConfig.Globals)
  53. }
  54. func (s *SandboxDecoder) SetName(name string) {
  55. re := regexp.MustCompile("\\W")
  56. s.name = re.ReplaceAllString(name, "_")
  57. }
  58. // Heka will call this before calling any other methods to give us access to
  59. // the pipeline configuration.
  60. func (s *SandboxDecoder) SetPipelineConfig(pConfig *pipeline.PipelineConfig) {
  61. s.pConfig = pConfig
  62. }
  63. func (s *SandboxDecoder) Init(config interface{}) (err error) {
  64. s.sbc = config.(*SandboxConfig)
  65. globals := s.pConfig.Globals
  66. s.sbc.ScriptFilename = globals.PrependShareDir(s.sbc.ScriptFilename)
  67. s.sbc.PluginType = "decoder"
  68. s.sampleDenominator = globals.SampleDenominator
  69. s.tz = time.UTC
  70. if tz, ok := s.sbc.Config["tz"]; ok {
  71. s.tz, err = time.LoadLocation(tz.(string))
  72. if err != nil {
  73. return
  74. }
  75. }
  76. data_dir := globals.PrependBaseDir(DATA_DIR)
  77. if !fileExists(data_dir) {
  78. err = os.MkdirAll(data_dir, 0700)
  79. if err != nil {
  80. return
  81. }
  82. }
  83. switch s.sbc.ScriptType {
  84. case "lua":
  85. default:
  86. return fmt.Errorf("unsupported script type: %s", s.sbc.ScriptType)
  87. }
  88. s.sample = true
  89. return
  90. }
  91. func copyMessageHeaders(dst *message.Message, src *message.Message) {
  92. if src == nil || dst == nil || src == dst {
  93. return
  94. }
  95. if src.Timestamp != nil {
  96. dst.SetTimestamp(*src.Timestamp)
  97. } else {
  98. dst.Timestamp = nil
  99. }
  100. if src.Type != nil {
  101. dst.SetType(*src.Type)
  102. } else {
  103. dst.Type = nil
  104. }
  105. if src.Logger != nil {
  106. dst.SetLogger(*src.Logger)
  107. } else {
  108. dst.Logger = nil
  109. }
  110. if src.Severity != nil {
  111. dst.SetSeverity(*src.Severity)
  112. } else {
  113. dst.Severity = nil
  114. }
  115. if src.Pid != nil {
  116. dst.SetPid(*src.Pid)
  117. } else {
  118. dst.Pid = nil
  119. }
  120. if src.Hostname != nil {
  121. dst.SetHostname(*src.Hostname)
  122. } else {
  123. dst.Hostname = nil
  124. }
  125. }
  126. func (s *SandboxDecoder) SetDecoderRunner(dr pipeline.DecoderRunner) {
  127. if s.sb != nil {
  128. return // no-op already initialized
  129. }
  130. s.dRunner = dr
  131. var original *message.Message
  132. var err error
  133. switch s.sbc.ScriptType {
  134. case "lua":
  135. s.sb, err = lua.CreateLuaSandbox(s.sbc)
  136. default:
  137. err = fmt.Errorf("unsupported script type: %s", s.sbc.ScriptType)
  138. }
  139. if err == nil {
  140. s.preservationFile = filepath.Join(s.pConfig.Globals.PrependBaseDir(DATA_DIR),
  141. dr.Name()+DATA_EXT)
  142. if s.sbc.PreserveData && fileExists(s.preservationFile) {
  143. err = s.sb.Init(s.preservationFile)
  144. } else {
  145. err = s.sb.Init("")
  146. }
  147. }
  148. if err != nil {
  149. dr.LogError(err)
  150. if s.sb != nil {
  151. s.sb.Destroy("")
  152. s.sb = nil
  153. }
  154. s.pConfig.Globals.ShutDown(1)
  155. return
  156. }
  157. s.sb.InjectMessage(func(payload, payload_type, payload_name string) int {
  158. if s.pack == nil {
  159. s.pack = dr.NewPack()
  160. if s.pack == nil {
  161. return 5 // We're aborting, exit out.
  162. }
  163. if original == nil && len(s.packs) > 0 {
  164. original = s.packs[0].Message // payload injections have the original header data in the first pack
  165. }
  166. } else {
  167. original = nil // processing a new message, clear the old message
  168. }
  169. if len(payload_type) == 0 { // heka protobuf message
  170. // write protobuf encoding to MsgBytes
  171. needed := len(payload)
  172. if cap(s.pack.MsgBytes) < needed {
  173. s.pack.MsgBytes = make([]byte, len(payload))
  174. } else {
  175. s.pack.MsgBytes = s.pack.MsgBytes[:len(payload)]
  176. }
  177. copy(s.pack.MsgBytes, payload)
  178. s.pack.TrustMsgBytes = true
  179. if original == nil {
  180. original = new(message.Message)
  181. copyMessageHeaders(original, s.pack.Message) // save off the header values since unmarshal will wipe them out
  182. }
  183. if nil != proto.Unmarshal(s.pack.MsgBytes, s.pack.Message) {
  184. return 1
  185. }
  186. if s.tz != time.UTC {
  187. const layout = "2006-01-02T15:04:05.999999999" // remove the incorrect UTC tz info
  188. t := time.Unix(0, s.pack.Message.GetTimestamp())
  189. t = t.In(time.UTC)
  190. ct, _ := time.ParseInLocation(layout, t.Format(layout), s.tz)
  191. s.pack.Message.SetTimestamp(ct.UnixNano())
  192. s.pack.TrustMsgBytes = false
  193. }
  194. } else {
  195. s.pack.TrustMsgBytes = false
  196. s.pack.Message.SetPayload(payload)
  197. ptype, _ := message.NewField("payload_type", payload_type, "file-extension")
  198. s.pack.Message.AddField(ptype)
  199. pname, _ := message.NewField("payload_name", payload_name, "")
  200. s.pack.Message.AddField(pname)
  201. }
  202. if original != nil {
  203. // if future injections fail to set the standard headers, use the values
  204. // from the original message.
  205. if s.pack.Message.Uuid == nil {
  206. s.pack.Message.SetUuid(uuid.NewRandom()) // UUID should always be unique
  207. s.pack.TrustMsgBytes = false
  208. }
  209. if s.pack.Message.Timestamp == nil {
  210. s.pack.Message.SetTimestamp(original.GetTimestamp())
  211. s.pack.TrustMsgBytes = false
  212. }
  213. if s.pack.Message.Type == nil {
  214. s.pack.Message.SetType(original.GetType())
  215. s.pack.TrustMsgBytes = false
  216. }
  217. if s.pack.Message.Hostname == nil {
  218. s.pack.Message.SetHostname(original.GetHostname())
  219. s.pack.TrustMsgBytes = false
  220. }
  221. if s.pack.Message.Logger == nil {
  222. s.pack.Message.SetLogger(original.GetLogger())
  223. s.pack.TrustMsgBytes = false
  224. }
  225. if s.pack.Message.Severity == nil {
  226. s.pack.Message.SetSeverity(original.GetSeverity())
  227. s.pack.TrustMsgBytes = false
  228. }
  229. if s.pack.Message.Pid == nil {
  230. s.pack.Message.SetPid(original.GetPid())
  231. s.pack.TrustMsgBytes = false
  232. }
  233. }
  234. s.packs = append(s.packs, s.pack)
  235. s.pack = nil
  236. return 0
  237. })
  238. }
  239. func (s *SandboxDecoder) Shutdown() {
  240. err := s.destroy()
  241. if err != nil {
  242. s.dRunner.LogError(err)
  243. }
  244. }
  245. func (s *SandboxDecoder) destroy() error {
  246. s.reportLock.Lock()
  247. var err error
  248. if s.sb != nil {
  249. if s.sbc.PreserveData {
  250. err = s.sb.Destroy(s.preservationFile)
  251. } else {
  252. err = s.sb.Destroy("")
  253. }
  254. s.sb = nil
  255. }
  256. s.reportLock.Unlock()
  257. return err
  258. }
  259. func (s *SandboxDecoder) Decode(pack *pipeline.PipelinePack) (packs []*pipeline.PipelinePack,
  260. err error) {
  261. if s.sb == nil {
  262. err = fmt.Errorf("SandboxDecoder has been terminated")
  263. return
  264. }
  265. s.pack = pack
  266. atomic.AddInt64(&s.processMessageCount, 1)
  267. var startTime time.Time
  268. if s.sample {
  269. startTime = time.Now()
  270. }
  271. retval := s.sb.ProcessMessage(s.pack)
  272. if s.sample {
  273. duration := time.Since(startTime).Nanoseconds()
  274. s.reportLock.Lock()
  275. s.processMessageDuration += duration
  276. s.processMessageSamples++
  277. s.reportLock.Unlock()
  278. }
  279. s.sample = 0 == rand.Intn(s.sampleDenominator)
  280. if retval > 0 {
  281. err = fmt.Errorf("FATAL: %s", s.sb.LastError())
  282. s.dRunner.LogError(err)
  283. s.pConfig.Globals.ShutDown(1)
  284. }
  285. if retval < 0 {
  286. atomic.AddInt64(&s.processMessageFailures, 1)
  287. if s.pack != nil {
  288. err = fmt.Errorf("Failed parsing: %s payload: %s",
  289. s.sb.LastError(), s.pack.Message.GetPayload())
  290. } else {
  291. err = fmt.Errorf("Failed after a successful inject_message call: %s", s.sb.LastError())
  292. }
  293. if len(s.packs) > 1 {
  294. for _, p := range s.packs[1:] {
  295. p.Recycle(nil)
  296. }
  297. }
  298. s.packs = nil
  299. }
  300. if retval == 0 && s.pack != nil {
  301. // InjectMessage was never called, we're passing the original message
  302. // through.
  303. packs = append(packs, pack)
  304. s.pack = nil
  305. } else {
  306. packs = s.packs
  307. }
  308. s.packs = nil
  309. return packs, err
  310. }
  311. func (s *SandboxDecoder) EncodesMsgBytes() bool {
  312. return true
  313. }
  314. // Satisfies the `pipeline.ReportingPlugin` interface to provide sandbox state
  315. // information to the Heka report and dashboard.
  316. func (s *SandboxDecoder) ReportMsg(msg *message.Message) error {
  317. s.reportLock.Lock()
  318. defer s.reportLock.Unlock()
  319. if s.sb == nil {
  320. return fmt.Errorf("Decoder is not running")
  321. }
  322. message.NewIntField(msg, "Memory", int(s.sb.Usage(TYPE_MEMORY,
  323. STAT_CURRENT)), "B")
  324. message.NewIntField(msg, "MaxMemory", int(s.sb.Usage(TYPE_MEMORY,
  325. STAT_MAXIMUM)), "B")
  326. message.NewIntField(msg, "MaxInstructions", int(s.sb.Usage(
  327. TYPE_INSTRUCTIONS, STAT_MAXIMUM)), "count")
  328. message.NewIntField(msg, "MaxOutput", int(s.sb.Usage(TYPE_OUTPUT,
  329. STAT_MAXIMUM)), "B")
  330. message.NewInt64Field(msg, "ProcessMessageCount", atomic.LoadInt64(&s.processMessageCount), "count")
  331. message.NewInt64Field(msg, "ProcessMessageFailures", atomic.LoadInt64(&s.processMessageFailures), "count")
  332. message.NewInt64Field(msg, "ProcessMessageSamples", s.processMessageSamples, "count")
  333. var tmp int64 = 0
  334. if s.processMessageSamples > 0 {
  335. tmp = s.processMessageDuration / s.processMessageSamples
  336. }
  337. message.NewInt64Field(msg, "ProcessMessageAvgDuration", tmp, "ns")
  338. return nil
  339. }
  340. func init() {
  341. pipeline.RegisterPlugin("SandboxDecoder", func() interface{} {
  342. return new(SandboxDecoder)
  343. })
  344. pipeline.RegisterPlugin("SandboxFilter", func() interface{} {
  345. return new(SandboxFilter)
  346. })
  347. pipeline.RegisterPlugin("SandboxManagerFilter", func() interface{} {
  348. return new(SandboxManagerFilter)
  349. })
  350. }