PageRenderTime 47ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/pipeline/splitter_runner_test.go

https://gitlab.com/wilane/heka
Go | 478 lines | 372 code | 70 blank | 36 comment | 25 complexity | 478150f36c9b70f9c6567e60eccf09a2 MD5 | raw file
  1. /***** BEGIN LICENSE BLOCK *****
  2. # This Source Code Form is subject to the terms of the Mozilla Public
  3. # License, v. 2.0. If a copy of the MPL was not distributed with this file,
  4. # You can obtain one at http://mozilla.org/MPL/2.0/.
  5. #
  6. # The Initial Developer of the Original Code is the Mozilla Foundation.
  7. # Portions created by the Initial Developer are Copyright (C) 2012-2015
  8. # the Initial Developer. All Rights Reserved.
  9. #
  10. # Contributor(s):
  11. # Mike Trinkala (trink@mozilla.com)
  12. # Mark Reid (mreid@mozilla.com)
  13. # Rob Miller (rmiller@mozilla.com)
  14. #
  15. # ***** END LICENSE BLOCK *****/
  16. package pipeline
  17. import (
  18. "bytes"
  19. "io"
  20. "io/ioutil"
  21. "path/filepath"
  22. "strings"
  23. "github.com/mozilla-services/heka/message"
  24. ts "github.com/mozilla-services/heka/pipeline/testsupport"
  25. "github.com/rafrombrc/gomock/gomock"
  26. gs "github.com/rafrombrc/gospec/src/gospec"
  27. )
  28. // Dummy reader that will return some data along with the EOF error.
  29. type MockDataReader struct {
  30. data []byte
  31. ptr int
  32. }
  33. func (d *MockDataReader) Read(p []byte) (n int, err error) {
  34. var start = d.ptr
  35. d.ptr += len(p)
  36. if d.ptr >= len(d.data) {
  37. d.ptr = len(d.data)
  38. copy(p, d.data[start:])
  39. return (d.ptr - start), io.EOF
  40. }
  41. copy(p, d.data[start:d.ptr])
  42. return (d.ptr - start), nil
  43. }
  44. func (d *MockDataReader) Append(p []byte) {
  45. newData := make([]byte, len(d.data)+len(p))
  46. copy(newData, d.data)
  47. copy(newData[len(d.data):], p)
  48. d.data = newData
  49. }
  50. func makeMockReader(data []byte) (d *MockDataReader) {
  51. d = new(MockDataReader)
  52. d.data = make([]byte, len(data))
  53. d.ptr = 0
  54. copy(d.data, data)
  55. return
  56. }
  57. type MultiReadReader struct {
  58. data0 []byte
  59. data1 []byte
  60. ptr int
  61. firstDone bool
  62. }
  63. func (mr *MultiReadReader) read(p []byte, data []byte) (n int) {
  64. start := mr.ptr
  65. mr.ptr += len(p)
  66. if mr.ptr >= len(data) {
  67. mr.ptr = len(data)
  68. copy(p, data[start:])
  69. } else {
  70. copy(p, data[start:mr.ptr])
  71. }
  72. return mr.ptr - start
  73. }
  74. func (mr *MultiReadReader) Read(p []byte) (n int, err error) {
  75. if !mr.firstDone {
  76. n = mr.read(p, mr.data0)
  77. if mr.ptr >= len(mr.data0) {
  78. mr.firstDone = true
  79. mr.ptr = 0
  80. }
  81. return n, nil
  82. }
  83. // On second buffer now.
  84. n = mr.read(p, mr.data1)
  85. if mr.ptr >= len(mr.data1) {
  86. err = io.EOF
  87. }
  88. return n, err
  89. }
  90. func makeMultiReadReader(data []byte) (mr *MultiReadReader) {
  91. mr = &MultiReadReader{}
  92. idxHalf := len(data) / 2
  93. mr.data0 = make([]byte, idxHalf)
  94. mr.data1 = make([]byte, len(data)-idxHalf)
  95. mr.ptr = 0
  96. copy(mr.data0, data[:idxHalf])
  97. copy(mr.data1, data[idxHalf:])
  98. return
  99. }
  100. func readRecordsFromStream(sr *sRunner, reader io.Reader, getRemaining bool) (count int,
  101. errCount int, bytesRead int, foundEOFCount int, remainingDataLength int,
  102. finalRecordLength int, eofRecordLength int) {
  103. done := false
  104. for !done {
  105. n, record, err := (*sr).GetRecordFromStream(reader)
  106. if len(record) > 0 {
  107. count += 1
  108. finalRecordLength = len(record)
  109. }
  110. bytesRead += n
  111. if err != nil {
  112. if err == io.EOF {
  113. foundEOFCount = count
  114. eofRecordLength = len(record)
  115. if getRemaining {
  116. rem := (*sr).GetRemainingData()
  117. remainingDataLength = len(rem)
  118. }
  119. done = true
  120. } else {
  121. errCount++
  122. continue
  123. }
  124. }
  125. }
  126. return
  127. }
  128. func SplitterRunnerSpec(c gs.Context) {
  129. t := &ts.SimpleT{}
  130. ctrl := gomock.NewController(t)
  131. defer ctrl.Finish()
  132. srConfig := CommonSplitterConfig{}
  133. c.Specify("A SplitterRunner w/ HekaFramingSplitter", func() {
  134. splitter := &HekaFramingSplitter{}
  135. config := splitter.ConfigStruct().(*HekaFramingSplitterConfig)
  136. useMsgBytes := true
  137. srConfig.UseMsgBytes = &useMsgBytes
  138. sr := NewSplitterRunner("HekaFramingSplitter", splitter, srConfig)
  139. splitter.SetSplitterRunner(sr)
  140. err := splitter.Init(config)
  141. c.Assume(err, gs.IsNil)
  142. b, err := ioutil.ReadFile(filepath.Join(".", "testsupport", "multi.dat"))
  143. c.Assume(err, gs.IsNil)
  144. reader := makeMockReader(b)
  145. c.Specify("correctly handles data at EOF", func() {
  146. count, errCount, bytesRead, foundEOFCount,
  147. remainingDataLength, finalRecordLength,
  148. eofRecordLength := readRecordsFromStream(sr, reader, true)
  149. c.Expect(errCount, gs.Equals, 0)
  150. c.Expect(count, gs.Equals, 50)
  151. c.Expect(foundEOFCount, gs.Equals, 50)
  152. c.Expect(remainingDataLength, gs.Equals, 0)
  153. c.Expect(finalRecordLength, gs.Equals, 215)
  154. c.Expect(eofRecordLength, gs.Equals, 0)
  155. c.Expect(bytesRead, gs.Equals, len(b))
  156. })
  157. c.Specify("correctly splits & unframes a protobuf stream", func() {
  158. ir := NewMockInputRunner(ctrl)
  159. sr.SetInputRunner(ir)
  160. recycleChan := make(chan *PipelinePack, 1)
  161. pack := NewPipelinePack(recycleChan)
  162. recycleChan <- pack
  163. numRecs := 50
  164. ir.EXPECT().InChan().Times(numRecs).Return(recycleChan)
  165. delCall := ir.EXPECT().Deliver(pack).Times(numRecs)
  166. delCall.Do(func(pack *PipelinePack) {
  167. pack.Recycle(nil)
  168. })
  169. for err == nil {
  170. err = sr.SplitStream(reader, nil)
  171. }
  172. c.Expect(err, gs.Equals, io.EOF)
  173. })
  174. c.Specify("correctly handles appends after EOF", func() {
  175. half := len(b) / 2
  176. reader := makeMockReader(b[:half])
  177. totalBytesRead := 0
  178. count, errCount, bytesRead, foundEOFCount, _, finalRecordLength,
  179. eofRecordLength := readRecordsFromStream(sr, reader, false)
  180. totalBytesRead += bytesRead
  181. c.Expect(errCount, gs.Equals, 0)
  182. c.Expect(count, gs.Equals, 25)
  183. c.Expect(foundEOFCount, gs.Equals, 25)
  184. c.Expect(finalRecordLength, gs.Equals, 215)
  185. c.Expect(eofRecordLength, gs.Equals, 0)
  186. c.Expect(bytesRead <= half, gs.IsTrue)
  187. reader.Append(b[half:])
  188. count, errCount, bytesRead, foundEOFCount,
  189. remainingDataLength, finalRecordLength,
  190. eofRecordLength := readRecordsFromStream(sr, reader, true)
  191. totalBytesRead += bytesRead
  192. c.Expect(errCount, gs.Equals, 0)
  193. c.Expect(count, gs.Equals, 25)
  194. c.Expect(foundEOFCount, gs.Equals, 25)
  195. c.Expect(remainingDataLength, gs.Equals, 0)
  196. c.Expect(finalRecordLength, gs.Equals, 215)
  197. c.Expect(eofRecordLength, gs.Equals, 0)
  198. c.Expect(totalBytesRead, gs.Equals, len(b))
  199. })
  200. c.Specify("reuse on another stream without GetRemainingData", func() {
  201. // Test the case where we reuse the same SplitterRunner on
  202. // two different readers, and we do not call GetRemainingData before
  203. // using the second reader.
  204. half := len(b) / 2
  205. reader1 := makeMockReader(b[:half])
  206. count, errCount, bytesRead, foundEOFCount, _, finalRecordLength,
  207. eofRecordLength := readRecordsFromStream(sr, reader1, false)
  208. c.Expect(errCount, gs.Equals, 0)
  209. c.Expect(count, gs.Equals, 25)
  210. c.Expect(foundEOFCount, gs.Equals, 25)
  211. c.Expect(finalRecordLength, gs.Equals, 215)
  212. c.Expect(eofRecordLength, gs.Equals, 0)
  213. leftovers := half - bytesRead
  214. c.Expect(leftovers > 0, gs.IsTrue)
  215. reader2 := makeMockReader(b)
  216. // Don't call GetRemainingData before using sr on a new stream
  217. count, errCount, bytesRead, foundEOFCount, remainingDataLength, finalRecordLength,
  218. eofRecordLength := readRecordsFromStream(sr, reader2, true)
  219. c.Expect(errCount, gs.Equals, 0)
  220. c.Expect(count, gs.Equals, 50)
  221. c.Expect(foundEOFCount, gs.Equals, 50)
  222. c.Expect(remainingDataLength, gs.Equals, 0)
  223. c.Expect(finalRecordLength, gs.Equals, 215)
  224. c.Expect(eofRecordLength, gs.Equals, 0)
  225. // sr misreports the "remaining data" piece from reader1 as being
  226. // read from reader2
  227. c.Expect(bytesRead, gs.Equals, len(b)+leftovers)
  228. })
  229. c.Specify("reuse on another stream with reset", func() {
  230. // Test the case where we reuse the same SplitterRunner on
  231. // two different readers, but we call GetRemainingData before using
  232. // the second reader.
  233. half := len(b) / 2
  234. reader1 := makeMockReader(b[:half])
  235. count, errCount, bytesRead, foundEOFCount, _, finalRecordLength,
  236. eofRecordLength := readRecordsFromStream(sr, reader1, false)
  237. c.Expect(errCount, gs.Equals, 0)
  238. c.Expect(count, gs.Equals, 25)
  239. c.Expect(foundEOFCount, gs.Equals, 25)
  240. c.Expect(finalRecordLength, gs.Equals, 215)
  241. c.Expect(eofRecordLength, gs.Equals, 0)
  242. leftovers := half - bytesRead
  243. c.Expect(leftovers > 0, gs.IsTrue)
  244. reader2 := makeMockReader(b)
  245. // Call GetRemainingData before using sr on a new stream
  246. sr.GetRemainingData()
  247. count, errCount, bytesRead, foundEOFCount, remainingDataLength, finalRecordLength,
  248. eofRecordLength := readRecordsFromStream(sr, reader2, true)
  249. c.Expect(errCount, gs.Equals, 0)
  250. c.Expect(count, gs.Equals, 50)
  251. c.Expect(foundEOFCount, gs.Equals, 50)
  252. c.Expect(remainingDataLength, gs.Equals, 0)
  253. c.Expect(finalRecordLength, gs.Equals, 215)
  254. c.Expect(eofRecordLength, gs.Equals, 0)
  255. // Now we see the correct number of bytes being read.
  256. c.Expect(bytesRead, gs.Equals, len(b))
  257. })
  258. })
  259. c.Specify("A SplitterRunner w/ TokenSplitter", func() {
  260. splitter := &TokenSplitter{}
  261. config := splitter.ConfigStruct().(*TokenSplitterConfig)
  262. c.Specify("sets readPos to 0 when read returns ErrShortBuffer", func() {
  263. config.Delimiter = "\t"
  264. err := splitter.Init(config)
  265. c.Assume(err, gs.IsNil)
  266. sr := NewSplitterRunner("TokenSplitter", splitter, srConfig)
  267. b := make([]byte, message.MAX_RECORD_SIZE+1)
  268. reader := bytes.NewReader(b)
  269. var n int
  270. var record []byte
  271. for err == nil {
  272. n, record, err = sr.GetRecordFromStream(reader)
  273. }
  274. c.Expect(n, gs.Equals, int(message.MAX_RECORD_SIZE))
  275. c.Expect(len(record), gs.Equals, 0)
  276. c.Expect(err, gs.Equals, io.ErrShortBuffer)
  277. c.Expect(sr.readPos, gs.Equals, 0)
  278. c.Expect(sr.scanPos, gs.Equals, 0)
  279. })
  280. c.Specify("checks if splitter honors 'deliver_incomplete_final' setting", func() {
  281. config.Count = 4
  282. numRecs := 10
  283. err := splitter.Init(config)
  284. c.Assume(err, gs.IsNil)
  285. packSupply := make(chan *PipelinePack, 1)
  286. pack := NewPipelinePack(packSupply)
  287. packSupply <- pack
  288. ir := NewMockInputRunner(ctrl)
  289. // ir.EXPECT().InChan().Return(packSupply).Times(numRecs)
  290. // ir.EXPECT().Name().Return("foo").Times(numRecs)
  291. ir.EXPECT().InChan().Return(packSupply).AnyTimes()
  292. ir.EXPECT().Name().Return("foo").AnyTimes()
  293. incompleteFinal := true
  294. srConfig.IncompleteFinal = &incompleteFinal
  295. sr := NewSplitterRunner("TokenSplitter", splitter, srConfig)
  296. sr.ir = ir
  297. rExpected := []byte("test1\ntest12\ntest123\npartial\n")
  298. buf := bytes.Repeat(rExpected, numRecs)
  299. buf = buf[:len(buf)-1] // 40 lines separated by 39 newlines
  300. reader := bytes.NewReader(buf)
  301. mockDel := NewMockDeliverer(ctrl)
  302. delCall := mockDel.EXPECT().Deliver(gomock.Any()).AnyTimes()
  303. i := 0
  304. delCall.Do(func(pack *PipelinePack) {
  305. i++
  306. if i < numRecs {
  307. c.Expect(pack.Message.GetPayload(), gs.Equals, string(rExpected))
  308. } else {
  309. c.Expect(pack.Message.GetPayload(), gs.Equals,
  310. string(rExpected[:len(rExpected)-1]))
  311. }
  312. pack.Recycle(nil)
  313. })
  314. c.Specify("via SplitStream", func() {
  315. for err == nil {
  316. err = sr.SplitStream(reader, mockDel)
  317. }
  318. c.Expect(err, gs.Equals, io.EOF)
  319. c.Expect(i, gs.Equals, numRecs)
  320. })
  321. c.Specify("via SplitBytes", func() {
  322. seekPos, err := sr.SplitBytes(buf, mockDel)
  323. c.Assume(err, gs.IsNil)
  324. c.Expect(seekPos, gs.Equals, len(buf))
  325. c.Expect(i, gs.Equals, numRecs)
  326. })
  327. })
  328. })
  329. c.Specify("A SplitterRunner w/ NullSplitter", func() {
  330. splitter := &NullSplitter{}
  331. config := struct{}{}
  332. c.Specify("reads to EOF when 'ToEOF' call is used", func() {
  333. err := splitter.Init(config)
  334. c.Assume(err, gs.IsNil)
  335. // Create SplitterRunner w/ mock InputRunner
  336. sr := NewSplitterRunner("TokenSplitter", splitter, srConfig)
  337. ir := NewMockInputRunner(ctrl)
  338. sr.SetInputRunner(ir)
  339. recycleChan := make(chan *PipelinePack, 1)
  340. pack := NewPipelinePack(recycleChan)
  341. recycleChan <- pack
  342. ir.EXPECT().InChan().Return(recycleChan)
  343. ir.EXPECT().Name().Return("InputRunnerName")
  344. // Create reader that will always require multiple reads.
  345. s := "0123456789"
  346. b := bytes.Repeat([]byte(s), 100)
  347. reader := makeMultiReadReader(b)
  348. // Set up deliverer that will return the pack back to us.
  349. delChan := make(chan *PipelinePack, 1)
  350. delFunc := func(pack *PipelinePack) {
  351. delChan <- pack
  352. }
  353. del := &deliverer{
  354. deliver: delFunc,
  355. }
  356. errChan := make(chan error, 1)
  357. go func() {
  358. err := sr.SplitStreamNullSplitterToEOF(reader, del)
  359. errChan <- err
  360. }()
  361. pack = <-delChan
  362. c.Expect(pack.Message.GetPayload(), gs.Equals, string(b))
  363. err = <-errChan
  364. c.Expect(err, gs.Equals, io.EOF)
  365. })
  366. c.Specify("only reads once when regular `SplitStream` call is used", func() {
  367. err := splitter.Init(config)
  368. c.Assume(err, gs.IsNil)
  369. // Create SplitterRunner w/ mock InputRunner
  370. sr := NewSplitterRunner("TokenSplitter", splitter, srConfig)
  371. ir := NewMockInputRunner(ctrl)
  372. sr.SetInputRunner(ir)
  373. recycleChan := make(chan *PipelinePack, 2)
  374. pack0 := NewPipelinePack(recycleChan)
  375. pack1 := NewPipelinePack(recycleChan)
  376. recycleChan <- pack0
  377. recycleChan <- pack1
  378. ir.EXPECT().InChan().Return(recycleChan).Times(2)
  379. ir.EXPECT().Name().Return("InputRunnerName").Times(2)
  380. // Create reader that will always require multiple reads.
  381. s := "0123456789"
  382. b := make([]byte, 1000)
  383. copy(b[:500], strings.Repeat(s, 50))
  384. copy(b[500:510], "FFFFFFFFFF") // So the first and second half aren't identical.
  385. copy(b[510:], strings.Repeat(s, 49))
  386. reader := makeMultiReadReader(b)
  387. // Set up deliverer that will return the pack back to us.
  388. delChan := make(chan *PipelinePack, 1)
  389. delFunc := func(pack *PipelinePack) {
  390. delChan <- pack
  391. }
  392. del := &deliverer{
  393. deliver: delFunc,
  394. }
  395. errChan := make(chan error, 1)
  396. go func() {
  397. err := sr.SplitStream(reader, del)
  398. errChan <- err
  399. }()
  400. pack := <-delChan
  401. c.Expect(pack.Message.GetPayload(), gs.Equals, string(b)[:500])
  402. pack = <-delChan
  403. c.Expect(pack.Message.GetPayload(), gs.Equals, string(b)[500:])
  404. err = <-errChan
  405. c.Expect(err, gs.Equals, io.EOF)
  406. })
  407. })
  408. }