PageRenderTime 1599ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/daemon/logger/splunk/splunk.go

https://github.com/dotcloud/docker
Go | 672 lines | 559 code | 77 blank | 36 comment | 132 complexity | b1a0f14f08306312822c0fd2c4cd84dc MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, Apache-2.0, BSD-2-Clause, CC-BY-4.0, 0BSD, CC-BY-SA-4.0, GPL-2.0, BSD-3-Clause, MIT
  1. // Package splunk provides the log driver for forwarding server logs to
  2. // Splunk HTTP Event Collector endpoint.
  3. package splunk // import "github.com/docker/docker/daemon/logger/splunk"
  4. import (
  5. "bytes"
  6. "compress/gzip"
  7. "context"
  8. "crypto/tls"
  9. "crypto/x509"
  10. "encoding/json"
  11. "fmt"
  12. "io"
  13. "net/http"
  14. "net/url"
  15. "os"
  16. "strconv"
  17. "strings"
  18. "sync"
  19. "time"
  20. "github.com/docker/docker/daemon/logger"
  21. "github.com/docker/docker/daemon/logger/loggerutils"
  22. "github.com/docker/docker/pkg/pools"
  23. "github.com/docker/docker/pkg/urlutil"
  24. "github.com/google/uuid"
  25. "github.com/sirupsen/logrus"
  26. )
  27. const (
  28. driverName = "splunk"
  29. splunkURLKey = "splunk-url"
  30. splunkTokenKey = "splunk-token"
  31. splunkSourceKey = "splunk-source"
  32. splunkSourceTypeKey = "splunk-sourcetype"
  33. splunkIndexKey = "splunk-index"
  34. splunkCAPathKey = "splunk-capath"
  35. splunkCANameKey = "splunk-caname"
  36. splunkInsecureSkipVerifyKey = "splunk-insecureskipverify"
  37. splunkFormatKey = "splunk-format"
  38. splunkVerifyConnectionKey = "splunk-verify-connection"
  39. splunkGzipCompressionKey = "splunk-gzip"
  40. splunkGzipCompressionLevelKey = "splunk-gzip-level"
  41. splunkIndexAcknowledgment = "splunk-index-acknowledgment"
  42. envKey = "env"
  43. envRegexKey = "env-regex"
  44. labelsKey = "labels"
  45. labelsRegexKey = "labels-regex"
  46. tagKey = "tag"
  47. )
  48. const (
  49. // How often do we send messages (if we are not reaching batch size)
  50. defaultPostMessagesFrequency = 5 * time.Second
  51. // How big can be batch of messages
  52. defaultPostMessagesBatchSize = 1000
  53. // Maximum number of messages we can store in buffer
  54. defaultBufferMaximum = 10 * defaultPostMessagesBatchSize
  55. // Number of messages allowed to be queued in the channel
  56. defaultStreamChannelSize = 4 * defaultPostMessagesBatchSize
  57. // maxResponseSize is the max amount that will be read from an http response
  58. maxResponseSize = 1024
  59. )
  60. const (
  61. envVarPostMessagesFrequency = "SPLUNK_LOGGING_DRIVER_POST_MESSAGES_FREQUENCY"
  62. envVarPostMessagesBatchSize = "SPLUNK_LOGGING_DRIVER_POST_MESSAGES_BATCH_SIZE"
  63. envVarBufferMaximum = "SPLUNK_LOGGING_DRIVER_BUFFER_MAX"
  64. envVarStreamChannelSize = "SPLUNK_LOGGING_DRIVER_CHANNEL_SIZE"
  65. )
  66. var batchSendTimeout = 30 * time.Second
  67. type splunkLoggerInterface interface {
  68. logger.Logger
  69. worker()
  70. }
  71. type splunkLogger struct {
  72. client *http.Client
  73. transport *http.Transport
  74. url string
  75. auth string
  76. nullMessage *splunkMessage
  77. // http compression
  78. gzipCompression bool
  79. gzipCompressionLevel int
  80. // Advanced options
  81. postMessagesFrequency time.Duration
  82. postMessagesBatchSize int
  83. bufferMaximum int
  84. indexAck bool
  85. // For synchronization between background worker and logger.
  86. // We use channel to send messages to worker go routine.
  87. // All other variables for blocking Close call before we flush all messages to HEC
  88. stream chan *splunkMessage
  89. lock sync.RWMutex
  90. closed bool
  91. closedCond *sync.Cond
  92. }
  93. type splunkLoggerInline struct {
  94. *splunkLogger
  95. nullEvent *splunkMessageEvent
  96. }
  97. type splunkLoggerJSON struct {
  98. *splunkLoggerInline
  99. }
  100. type splunkLoggerRaw struct {
  101. *splunkLogger
  102. prefix []byte
  103. }
  104. type splunkMessage struct {
  105. Event interface{} `json:"event"`
  106. Time string `json:"time"`
  107. Host string `json:"host"`
  108. Source string `json:"source,omitempty"`
  109. SourceType string `json:"sourcetype,omitempty"`
  110. Index string `json:"index,omitempty"`
  111. }
  112. type splunkMessageEvent struct {
  113. Line interface{} `json:"line"`
  114. Source string `json:"source"`
  115. Tag string `json:"tag,omitempty"`
  116. Attrs map[string]string `json:"attrs,omitempty"`
  117. }
  118. const (
  119. splunkFormatRaw = "raw"
  120. splunkFormatJSON = "json"
  121. splunkFormatInline = "inline"
  122. )
  123. func init() {
  124. if err := logger.RegisterLogDriver(driverName, New); err != nil {
  125. logrus.Fatal(err)
  126. }
  127. if err := logger.RegisterLogOptValidator(driverName, ValidateLogOpt); err != nil {
  128. logrus.Fatal(err)
  129. }
  130. }
  131. // New creates splunk logger driver using configuration passed in context
  132. func New(info logger.Info) (logger.Logger, error) {
  133. hostname, err := info.Hostname()
  134. if err != nil {
  135. return nil, fmt.Errorf("%s: cannot access hostname to set source field", driverName)
  136. }
  137. // Parse and validate Splunk URL
  138. splunkURL, err := parseURL(info)
  139. if err != nil {
  140. return nil, err
  141. }
  142. // Splunk Token is required parameter
  143. splunkToken, ok := info.Config[splunkTokenKey]
  144. if !ok {
  145. return nil, fmt.Errorf("%s: %s is expected", driverName, splunkTokenKey)
  146. }
  147. // FIXME set minimum TLS version for splunk (see https://github.com/moby/moby/issues/42443)
  148. tlsConfig := &tls.Config{} //nolint: gosec // G402: TLS MinVersion too low.
  149. // Splunk is using autogenerated certificates by default,
  150. // allow users to trust them with skipping verification
  151. if insecureSkipVerifyStr, ok := info.Config[splunkInsecureSkipVerifyKey]; ok {
  152. insecureSkipVerify, err := strconv.ParseBool(insecureSkipVerifyStr)
  153. if err != nil {
  154. return nil, err
  155. }
  156. tlsConfig.InsecureSkipVerify = insecureSkipVerify
  157. }
  158. // If path to the root certificate is provided - load it
  159. if caPath, ok := info.Config[splunkCAPathKey]; ok {
  160. caCert, err := os.ReadFile(caPath)
  161. if err != nil {
  162. return nil, err
  163. }
  164. caPool := x509.NewCertPool()
  165. caPool.AppendCertsFromPEM(caCert)
  166. tlsConfig.RootCAs = caPool
  167. }
  168. if caName, ok := info.Config[splunkCANameKey]; ok {
  169. tlsConfig.ServerName = caName
  170. }
  171. gzipCompression := false
  172. if gzipCompressionStr, ok := info.Config[splunkGzipCompressionKey]; ok {
  173. gzipCompression, err = strconv.ParseBool(gzipCompressionStr)
  174. if err != nil {
  175. return nil, err
  176. }
  177. }
  178. gzipCompressionLevel := gzip.DefaultCompression
  179. if gzipCompressionLevelStr, ok := info.Config[splunkGzipCompressionLevelKey]; ok {
  180. var err error
  181. gzipCompressionLevel64, err := strconv.ParseInt(gzipCompressionLevelStr, 10, 32)
  182. if err != nil {
  183. return nil, err
  184. }
  185. gzipCompressionLevel = int(gzipCompressionLevel64)
  186. if gzipCompressionLevel < gzip.DefaultCompression || gzipCompressionLevel > gzip.BestCompression {
  187. err := fmt.Errorf("not supported level '%s' for %s (supported values between %d and %d)",
  188. gzipCompressionLevelStr, splunkGzipCompressionLevelKey, gzip.DefaultCompression, gzip.BestCompression)
  189. return nil, err
  190. }
  191. }
  192. indexAck := false
  193. if indexAckStr, ok := info.Config[splunkIndexAcknowledgment]; ok {
  194. indexAck, err = strconv.ParseBool(indexAckStr)
  195. if err != nil {
  196. return nil, err
  197. }
  198. }
  199. transport := &http.Transport{
  200. TLSClientConfig: tlsConfig,
  201. Proxy: http.ProxyFromEnvironment,
  202. }
  203. client := &http.Client{
  204. Transport: transport,
  205. }
  206. source := info.Config[splunkSourceKey]
  207. sourceType := info.Config[splunkSourceTypeKey]
  208. index := info.Config[splunkIndexKey]
  209. var nullMessage = &splunkMessage{
  210. Host: hostname,
  211. Source: source,
  212. SourceType: sourceType,
  213. Index: index,
  214. }
  215. // Allow user to remove tag from the messages by setting tag to empty string
  216. tag := ""
  217. if tagTemplate, ok := info.Config[tagKey]; !ok || tagTemplate != "" {
  218. tag, err = loggerutils.ParseLogTag(info, loggerutils.DefaultTemplate)
  219. if err != nil {
  220. return nil, err
  221. }
  222. }
  223. attrs, err := info.ExtraAttributes(nil)
  224. if err != nil {
  225. return nil, err
  226. }
  227. var (
  228. postMessagesFrequency = getAdvancedOptionDuration(envVarPostMessagesFrequency, defaultPostMessagesFrequency)
  229. postMessagesBatchSize = getAdvancedOptionInt(envVarPostMessagesBatchSize, defaultPostMessagesBatchSize)
  230. bufferMaximum = getAdvancedOptionInt(envVarBufferMaximum, defaultBufferMaximum)
  231. streamChannelSize = getAdvancedOptionInt(envVarStreamChannelSize, defaultStreamChannelSize)
  232. )
  233. logger := &splunkLogger{
  234. client: client,
  235. transport: transport,
  236. url: splunkURL.String(),
  237. auth: "Splunk " + splunkToken,
  238. nullMessage: nullMessage,
  239. gzipCompression: gzipCompression,
  240. gzipCompressionLevel: gzipCompressionLevel,
  241. stream: make(chan *splunkMessage, streamChannelSize),
  242. postMessagesFrequency: postMessagesFrequency,
  243. postMessagesBatchSize: postMessagesBatchSize,
  244. bufferMaximum: bufferMaximum,
  245. indexAck: indexAck,
  246. }
  247. // By default we verify connection, but we allow use to skip that
  248. verifyConnection := true
  249. if verifyConnectionStr, ok := info.Config[splunkVerifyConnectionKey]; ok {
  250. var err error
  251. verifyConnection, err = strconv.ParseBool(verifyConnectionStr)
  252. if err != nil {
  253. return nil, err
  254. }
  255. }
  256. if verifyConnection {
  257. err = verifySplunkConnection(logger)
  258. if err != nil {
  259. return nil, err
  260. }
  261. }
  262. var splunkFormat string
  263. if splunkFormatParsed, ok := info.Config[splunkFormatKey]; ok {
  264. switch splunkFormatParsed {
  265. case splunkFormatInline:
  266. case splunkFormatJSON:
  267. case splunkFormatRaw:
  268. default:
  269. return nil, fmt.Errorf("Unknown format specified %s, supported formats are inline, json and raw", splunkFormat)
  270. }
  271. splunkFormat = splunkFormatParsed
  272. } else {
  273. splunkFormat = splunkFormatInline
  274. }
  275. var loggerWrapper splunkLoggerInterface
  276. switch splunkFormat {
  277. case splunkFormatInline:
  278. nullEvent := &splunkMessageEvent{
  279. Tag: tag,
  280. Attrs: attrs,
  281. }
  282. loggerWrapper = &splunkLoggerInline{logger, nullEvent}
  283. case splunkFormatJSON:
  284. nullEvent := &splunkMessageEvent{
  285. Tag: tag,
  286. Attrs: attrs,
  287. }
  288. loggerWrapper = &splunkLoggerJSON{&splunkLoggerInline{logger, nullEvent}}
  289. case splunkFormatRaw:
  290. var prefix bytes.Buffer
  291. if tag != "" {
  292. prefix.WriteString(tag)
  293. prefix.WriteString(" ")
  294. }
  295. for key, value := range attrs {
  296. prefix.WriteString(key)
  297. prefix.WriteString("=")
  298. prefix.WriteString(value)
  299. prefix.WriteString(" ")
  300. }
  301. loggerWrapper = &splunkLoggerRaw{logger, prefix.Bytes()}
  302. default:
  303. return nil, fmt.Errorf("Unexpected format %s", splunkFormat)
  304. }
  305. go loggerWrapper.worker()
  306. return loggerWrapper, nil
  307. }
  308. func (l *splunkLoggerInline) Log(msg *logger.Message) error {
  309. message := l.createSplunkMessage(msg)
  310. event := *l.nullEvent
  311. event.Line = string(msg.Line)
  312. event.Source = msg.Source
  313. message.Event = &event
  314. logger.PutMessage(msg)
  315. return l.queueMessageAsync(message)
  316. }
  317. func (l *splunkLoggerJSON) Log(msg *logger.Message) error {
  318. message := l.createSplunkMessage(msg)
  319. event := *l.nullEvent
  320. var rawJSONMessage json.RawMessage
  321. if err := json.Unmarshal(msg.Line, &rawJSONMessage); err == nil {
  322. event.Line = &rawJSONMessage
  323. } else {
  324. event.Line = string(msg.Line)
  325. }
  326. event.Source = msg.Source
  327. message.Event = &event
  328. logger.PutMessage(msg)
  329. return l.queueMessageAsync(message)
  330. }
  331. func (l *splunkLoggerRaw) Log(msg *logger.Message) error {
  332. // empty or whitespace-only messages are not accepted by HEC
  333. if strings.TrimSpace(string(msg.Line)) == "" {
  334. return nil
  335. }
  336. message := l.createSplunkMessage(msg)
  337. message.Event = string(append(l.prefix, msg.Line...))
  338. logger.PutMessage(msg)
  339. return l.queueMessageAsync(message)
  340. }
  341. func (l *splunkLogger) queueMessageAsync(message *splunkMessage) error {
  342. l.lock.RLock()
  343. defer l.lock.RUnlock()
  344. if l.closedCond != nil {
  345. return fmt.Errorf("%s: driver is closed", driverName)
  346. }
  347. l.stream <- message
  348. return nil
  349. }
  350. func (l *splunkLogger) worker() {
  351. timer := time.NewTicker(l.postMessagesFrequency)
  352. var messages []*splunkMessage
  353. for {
  354. select {
  355. case message, open := <-l.stream:
  356. if !open {
  357. l.postMessages(messages, true)
  358. l.lock.Lock()
  359. defer l.lock.Unlock()
  360. l.transport.CloseIdleConnections()
  361. l.closed = true
  362. l.closedCond.Signal()
  363. return
  364. }
  365. messages = append(messages, message)
  366. // Only sending when we get exactly to the batch size,
  367. // This also helps not to fire postMessages on every new message,
  368. // when previous try failed.
  369. if len(messages)%l.postMessagesBatchSize == 0 {
  370. messages = l.postMessages(messages, false)
  371. }
  372. case <-timer.C:
  373. messages = l.postMessages(messages, false)
  374. }
  375. }
  376. }
  377. func (l *splunkLogger) postMessages(messages []*splunkMessage, lastChance bool) []*splunkMessage {
  378. messagesLen := len(messages)
  379. ctx, cancel := context.WithTimeout(context.Background(), batchSendTimeout)
  380. defer cancel()
  381. for i := 0; i < messagesLen; i += l.postMessagesBatchSize {
  382. upperBound := i + l.postMessagesBatchSize
  383. if upperBound > messagesLen {
  384. upperBound = messagesLen
  385. }
  386. if err := l.tryPostMessages(ctx, messages[i:upperBound]); err != nil {
  387. logrus.WithError(err).WithField("module", "logger/splunk").Warn("Error while sending logs")
  388. if messagesLen-i >= l.bufferMaximum || lastChance {
  389. // If this is last chance - print them all to the daemon log
  390. if lastChance {
  391. upperBound = messagesLen
  392. }
  393. // Not all sent, but buffer has got to its maximum, let's log all messages
  394. // we could not send and return buffer minus one batch size
  395. for j := i; j < upperBound; j++ {
  396. if jsonEvent, err := json.Marshal(messages[j]); err != nil {
  397. logrus.Error(err)
  398. } else {
  399. logrus.Error(fmt.Errorf("Failed to send a message '%s'", string(jsonEvent)))
  400. }
  401. }
  402. return messages[upperBound:messagesLen]
  403. }
  404. // Not all sent, returning buffer from where we have not sent messages
  405. return messages[i:messagesLen]
  406. }
  407. }
  408. // All sent, return empty buffer
  409. return messages[:0]
  410. }
  411. func (l *splunkLogger) tryPostMessages(ctx context.Context, messages []*splunkMessage) error {
  412. if len(messages) == 0 {
  413. return nil
  414. }
  415. var buffer bytes.Buffer
  416. var writer io.Writer
  417. var gzipWriter *gzip.Writer
  418. var err error
  419. // If gzip compression is enabled - create gzip writer with specified compression
  420. // level. If gzip compression is disabled, use standard buffer as a writer
  421. if l.gzipCompression {
  422. gzipWriter, err = gzip.NewWriterLevel(&buffer, l.gzipCompressionLevel)
  423. if err != nil {
  424. return err
  425. }
  426. writer = gzipWriter
  427. } else {
  428. writer = &buffer
  429. }
  430. for _, message := range messages {
  431. jsonEvent, err := json.Marshal(message)
  432. if err != nil {
  433. return err
  434. }
  435. if _, err := writer.Write(jsonEvent); err != nil {
  436. return err
  437. }
  438. }
  439. // If gzip compression is enabled, tell it, that we are done
  440. if l.gzipCompression {
  441. err = gzipWriter.Close()
  442. if err != nil {
  443. return err
  444. }
  445. }
  446. req, err := http.NewRequest(http.MethodPost, l.url, bytes.NewBuffer(buffer.Bytes()))
  447. if err != nil {
  448. return err
  449. }
  450. req = req.WithContext(ctx)
  451. req.Header.Set("Authorization", l.auth)
  452. // Tell if we are sending gzip compressed body
  453. if l.gzipCompression {
  454. req.Header.Set("Content-Encoding", "gzip")
  455. }
  456. // Set the correct header if index acknowledgment is enabled
  457. if l.indexAck {
  458. requestChannel, err := uuid.NewRandom()
  459. if err != nil {
  460. return err
  461. }
  462. req.Header.Set("X-Splunk-Request-Channel", requestChannel.String())
  463. }
  464. resp, err := l.client.Do(req)
  465. if err != nil {
  466. return err
  467. }
  468. defer func() {
  469. pools.Copy(io.Discard, resp.Body)
  470. resp.Body.Close()
  471. }()
  472. if resp.StatusCode != http.StatusOK {
  473. rdr := io.LimitReader(resp.Body, maxResponseSize)
  474. body, err := io.ReadAll(rdr)
  475. if err != nil {
  476. return err
  477. }
  478. return fmt.Errorf("%s: failed to send event - %s - %s", driverName, resp.Status, string(body))
  479. }
  480. return nil
  481. }
  482. func (l *splunkLogger) Close() error {
  483. l.lock.Lock()
  484. defer l.lock.Unlock()
  485. if l.closedCond == nil {
  486. l.closedCond = sync.NewCond(&l.lock)
  487. close(l.stream)
  488. for !l.closed {
  489. l.closedCond.Wait()
  490. }
  491. }
  492. return nil
  493. }
  494. func (l *splunkLogger) Name() string {
  495. return driverName
  496. }
  497. func (l *splunkLogger) createSplunkMessage(msg *logger.Message) *splunkMessage {
  498. message := *l.nullMessage
  499. message.Time = fmt.Sprintf("%f", float64(msg.Timestamp.UnixNano())/float64(time.Second))
  500. return &message
  501. }
  502. // ValidateLogOpt looks for all supported by splunk driver options
  503. func ValidateLogOpt(cfg map[string]string) error {
  504. for key := range cfg {
  505. switch key {
  506. case splunkURLKey:
  507. case splunkTokenKey:
  508. case splunkSourceKey:
  509. case splunkSourceTypeKey:
  510. case splunkIndexKey:
  511. case splunkCAPathKey:
  512. case splunkCANameKey:
  513. case splunkInsecureSkipVerifyKey:
  514. case splunkFormatKey:
  515. case splunkVerifyConnectionKey:
  516. case splunkGzipCompressionKey:
  517. case splunkGzipCompressionLevelKey:
  518. case splunkIndexAcknowledgment:
  519. case envKey:
  520. case envRegexKey:
  521. case labelsKey:
  522. case labelsRegexKey:
  523. case tagKey:
  524. default:
  525. return fmt.Errorf("unknown log opt '%s' for %s log driver", key, driverName)
  526. }
  527. }
  528. return nil
  529. }
  530. func parseURL(info logger.Info) (*url.URL, error) {
  531. splunkURLStr, ok := info.Config[splunkURLKey]
  532. if !ok {
  533. return nil, fmt.Errorf("%s: %s is expected", driverName, splunkURLKey)
  534. }
  535. splunkURL, err := url.Parse(splunkURLStr)
  536. if err != nil {
  537. return nil, fmt.Errorf("%s: failed to parse %s as url value in %s", driverName, splunkURLStr, splunkURLKey)
  538. }
  539. if !urlutil.IsURL(splunkURLStr) ||
  540. !splunkURL.IsAbs() ||
  541. (splunkURL.Path != "" && splunkURL.Path != "/") ||
  542. splunkURL.RawQuery != "" ||
  543. splunkURL.Fragment != "" {
  544. return nil, fmt.Errorf("%s: expected format scheme://dns_name_or_ip:port for %s", driverName, splunkURLKey)
  545. }
  546. splunkURL.Path = "/services/collector/event/1.0"
  547. return splunkURL, nil
  548. }
  549. func verifySplunkConnection(l *splunkLogger) error {
  550. req, err := http.NewRequest(http.MethodOptions, l.url, nil)
  551. if err != nil {
  552. return err
  553. }
  554. resp, err := l.client.Do(req)
  555. if err != nil {
  556. return err
  557. }
  558. defer func() {
  559. pools.Copy(io.Discard, resp.Body)
  560. resp.Body.Close()
  561. }()
  562. if resp.StatusCode != http.StatusOK {
  563. rdr := io.LimitReader(resp.Body, maxResponseSize)
  564. body, err := io.ReadAll(rdr)
  565. if err != nil {
  566. return err
  567. }
  568. return fmt.Errorf("%s: failed to verify connection - %s - %s", driverName, resp.Status, string(body))
  569. }
  570. return nil
  571. }
  572. func getAdvancedOptionDuration(envName string, defaultValue time.Duration) time.Duration {
  573. valueStr := os.Getenv(envName)
  574. if valueStr == "" {
  575. return defaultValue
  576. }
  577. parsedValue, err := time.ParseDuration(valueStr)
  578. if err != nil {
  579. logrus.Error(fmt.Sprintf("Failed to parse value of %s as duration. Using default %v. %v", envName, defaultValue, err))
  580. return defaultValue
  581. }
  582. return parsedValue
  583. }
  584. func getAdvancedOptionInt(envName string, defaultValue int) int {
  585. valueStr := os.Getenv(envName)
  586. if valueStr == "" {
  587. return defaultValue
  588. }
  589. parsedValue, err := strconv.ParseInt(valueStr, 10, 32)
  590. if err != nil {
  591. logrus.Error(fmt.Sprintf("Failed to parse value of %s as integer. Using default %d. %v", envName, defaultValue, err))
  592. return defaultValue
  593. }
  594. return int(parsedValue)
  595. }