PageRenderTime 53ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 1ms

/util-logging/src/main/scala/com/twitter/logging/ScribeHandler.scala

http://github.com/twitter/util
Scala | 448 lines | 335 code | 59 blank | 54 comment | 22 complexity | 779343af7bb015523fb7633f99448981 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Copyright 2010 Twitter, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. * not use this file except in compliance with the License. You may obtain
  6. * a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package com.twitter.logging
  17. import java.io.IOException
  18. import java.net._
  19. import java.nio.{ByteBuffer, ByteOrder}
  20. import java.util.concurrent.atomic.AtomicLong
  21. import java.util.concurrent.{ArrayBlockingQueue, LinkedBlockingQueue, TimeUnit, ThreadPoolExecutor}
  22. import java.util.{Arrays, logging => javalog}
  23. import com.twitter.concurrent.NamedPoolThreadFactory
  24. import com.twitter.conversions.string._
  25. import com.twitter.conversions.time._
  26. import com.twitter.finagle.stats.{NullStatsReceiver, StatsReceiver}
  27. import com.twitter.io.Charsets
  28. import com.twitter.util.{Duration, NonFatal, Time}
  29. object ScribeHandler {
  30. private sealed trait ServerType
  31. private case object Unknown extends ServerType
  32. private case object Archaic extends ServerType
  33. private case object Modern extends ServerType
  34. val OK = 0
  35. val TRY_LATER = 1
  36. val DefaultHostname = "localhost"
  37. val DefaultPort = 1463
  38. val DefaultCategory = "scala"
  39. val DefaultBufferTime = 100.milliseconds
  40. val DefaultConnectBackoff = 15.seconds
  41. val DefaultMaxMessagesPerTransaction = 1000
  42. val DefaultMaxMessagesToBuffer = 10000
  43. val DefaultStatsReportPeriod = 5.minutes
  44. val log = Logger.get(getClass)
  45. /**
  46. * Generates a HandlerFactory that returns a ScribeHandler.
  47. * NOTE: ScribeHandler is usually used to write structured binary data.
  48. * When used in this way, wrapping this in other handlers, such as ThrottledHandler,
  49. * which emit plain-text messages into the log, will corrupt the resulting data.
  50. *
  51. * @param bufferTime
  52. * send a scribe message no more frequently than this:
  53. *
  54. * @param connectBackoff
  55. * don't connect more frequently than this (when the scribe server is down):
  56. */
  57. def apply(
  58. hostname: String = DefaultHostname,
  59. port: Int = DefaultPort,
  60. category: String = DefaultCategory,
  61. bufferTime: Duration = DefaultBufferTime,
  62. connectBackoff: Duration = DefaultConnectBackoff,
  63. maxMessagesPerTransaction: Int = DefaultMaxMessagesPerTransaction,
  64. maxMessagesToBuffer: Int = DefaultMaxMessagesToBuffer,
  65. formatter: Formatter = new Formatter(),
  66. level: Option[Level] = None,
  67. statsReceiver: StatsReceiver = NullStatsReceiver
  68. ) =
  69. () => new ScribeHandler(
  70. hostname,
  71. port,
  72. category,
  73. bufferTime,
  74. connectBackoff,
  75. maxMessagesPerTransaction,
  76. maxMessagesToBuffer,
  77. formatter,
  78. level,
  79. statsReceiver)
  80. def apply(
  81. hostname: String,
  82. port: Int,
  83. category: String,
  84. bufferTime: Duration,
  85. connectBackoff: Duration,
  86. maxMessagesPerTransaction: Int,
  87. maxMessagesToBuffer: Int,
  88. formatter: Formatter,
  89. level: Option[Level]
  90. ): () => ScribeHandler = apply(
  91. hostname,
  92. port,
  93. category,
  94. bufferTime,
  95. connectBackoff,
  96. maxMessagesPerTransaction,
  97. maxMessagesToBuffer,
  98. formatter,
  99. level,
  100. NullStatsReceiver)
  101. }
  102. /**
  103. * NOTE: ScribeHandler is usually used to write structured binary data.
  104. * When used in this way, wrapping this in other handlers, such as ThrottledHandler,
  105. * which emit plain-text messages into the log, will corrupt the resulting data.
  106. */
  107. class ScribeHandler(
  108. hostname: String,
  109. port: Int,
  110. category: String,
  111. bufferTime: Duration,
  112. connectBackoff: Duration,
  113. maxMessagesPerTransaction: Int,
  114. maxMessagesToBuffer: Int,
  115. formatter: Formatter,
  116. level: Option[Level],
  117. statsReceiver: StatsReceiver)
  118. extends Handler(formatter, level) {
  119. import ScribeHandler._
  120. def this(
  121. hostname: String,
  122. port: Int,
  123. category: String,
  124. bufferTime: Duration,
  125. connectBackoff: Duration,
  126. maxMessagesPerTransaction: Int,
  127. maxMessagesToBuffer: Int,
  128. formatter: Formatter,
  129. level: Option[Level]
  130. ) = this(hostname, port, category, bufferTime, connectBackoff,
  131. maxMessagesPerTransaction, maxMessagesToBuffer, formatter, level, NullStatsReceiver)
  132. private[this] val stats = new ScribeHandlerStats(statsReceiver)
  133. // it may be necessary to log errors here if scribe is down:
  134. private val loggerName = getClass.toString
  135. private var lastConnectAttempt = Time.epoch
  136. @volatile private var _lastTransmission = Time.epoch
  137. // visible for testing
  138. private[logging] def updateLastTransmission(): Unit = _lastTransmission = Time.now
  139. private var socket: Option[Socket] = None
  140. private var serverType: ServerType = Unknown
  141. private def isArchaicServer() = { serverType == Archaic }
  142. // Could be rewritten using a simple Condition (await/notify) or producer/consumer
  143. // with timed batching
  144. private[logging] val flusher = {
  145. val threadFactory = new NamedPoolThreadFactory("ScribeFlusher-" + category, true)
  146. // should be 1, but this is a crude form of retry
  147. val queue = new ArrayBlockingQueue[Runnable](5)
  148. val rejectionHandler = new ThreadPoolExecutor.DiscardPolicy()
  149. new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, queue, threadFactory, rejectionHandler)
  150. }
  151. private[logging] val queue = new LinkedBlockingQueue[Array[Byte]](maxMessagesToBuffer)
  152. def queueSize: Int = queue.size()
  153. override def flush() {
  154. def connect() {
  155. if (!socket.isDefined) {
  156. if (Time.now.since(lastConnectAttempt) > connectBackoff) {
  157. try {
  158. lastConnectAttempt = Time.now
  159. socket = Some(new Socket(hostname, port))
  160. stats.incrConnection()
  161. serverType = Unknown
  162. } catch {
  163. case e: Exception =>
  164. log.error("Unable to open socket to scribe server at %s:%d: %s", hostname, port, e)
  165. stats.incrConnectionFailure()
  166. }
  167. } else {
  168. stats.incrConnectionSkipped()
  169. }
  170. }
  171. }
  172. def detectArchaicServer(): Unit = {
  173. if (serverType != Unknown) return
  174. serverType = socket match {
  175. case None => Unknown
  176. case Some(s) => {
  177. val outStream = s.getOutputStream()
  178. try {
  179. val fakeMessageWithOldScribePrefix: Array[Byte] = {
  180. val prefix = OLD_SCRIBE_PREFIX
  181. val messageSize = prefix.length + 5
  182. val buffer = ByteBuffer.wrap(new Array[Byte](messageSize + 4))
  183. buffer.order(ByteOrder.BIG_ENDIAN)
  184. buffer.putInt(messageSize)
  185. buffer.put(prefix)
  186. buffer.putInt(0)
  187. buffer.put(0: Byte)
  188. buffer.array
  189. }
  190. outStream.write(fakeMessageWithOldScribePrefix)
  191. readResponseExpecting(s, OLD_SCRIBE_REPLY)
  192. // Didn't get exception, so the server must be archaic.
  193. log.debug("Scribe server is archaic; changing to old protocol for future requests.")
  194. Archaic
  195. } catch {
  196. case NonFatal(_) => Modern
  197. }
  198. }
  199. }
  200. }
  201. // TODO we should send any remaining messages before the app terminates
  202. def sendBatch() {
  203. synchronized {
  204. connect()
  205. detectArchaicServer()
  206. socket.foreach { s =>
  207. val outStream = s.getOutputStream()
  208. var remaining = queue.size
  209. // try to send the log records in batch
  210. // need to check if socket is closed due to exception
  211. while (remaining > 0 && socket.isDefined) {
  212. val count = maxMessagesPerTransaction min remaining
  213. val buffer = makeBuffer(count)
  214. var offset = 0
  215. try {
  216. outStream.write(buffer.array)
  217. val expectedReply = if (isArchaicServer()) OLD_SCRIBE_REPLY else SCRIBE_REPLY
  218. readResponseExpecting(s, expectedReply)
  219. stats.incrSentRecords(count)
  220. remaining -= count
  221. } catch {
  222. case e: Exception =>
  223. stats.incrDroppedRecords(count)
  224. log.error(e, "Failed to send %s %d log entries to scribe server at %s:%d",
  225. category, count, hostname, port)
  226. closeSocket()
  227. }
  228. }
  229. updateLastTransmission()
  230. }
  231. stats.log()
  232. }
  233. }
  234. def readResponseExpecting(socket: Socket, expectedReply: Array[Byte]): Unit = {
  235. var offset = 0
  236. val inStream = socket.getInputStream()
  237. // read response:
  238. val response = new Array[Byte](expectedReply.length)
  239. while (offset < response.length) {
  240. val n = inStream.read(response, offset, response.length - offset)
  241. if (n < 0) {
  242. throw new IOException("End of stream")
  243. }
  244. offset += n
  245. }
  246. if (!Arrays.equals(response, expectedReply)) {
  247. throw new IOException("Error response from scribe server: " + response.hexlify)
  248. }
  249. }
  250. flusher.execute( new Runnable {
  251. def run() { sendBatch() }
  252. })
  253. }
  254. // should be private, make it visible to tests
  255. private[logging] def makeBuffer(count: Int): ByteBuffer = {
  256. val texts = for (i <- 0 until count) yield queue.poll()
  257. val recordHeader = ByteBuffer.wrap(new Array[Byte](10 + category.length))
  258. recordHeader.order(ByteOrder.BIG_ENDIAN)
  259. recordHeader.put(11: Byte)
  260. recordHeader.putShort(1)
  261. recordHeader.putInt(category.length)
  262. recordHeader.put(category.getBytes(Charsets.Iso8859_1))
  263. recordHeader.put(11: Byte)
  264. recordHeader.putShort(2)
  265. val prefix = if (isArchaicServer()) OLD_SCRIBE_PREFIX else SCRIBE_PREFIX
  266. val messageSize = (count * (recordHeader.capacity + 5)) + texts.foldLeft(0) { _ + _.length } + prefix.length + 5
  267. val buffer = ByteBuffer.wrap(new Array[Byte](messageSize + 4))
  268. buffer.order(ByteOrder.BIG_ENDIAN)
  269. // "framing":
  270. buffer.putInt(messageSize)
  271. buffer.put(prefix)
  272. buffer.putInt(count)
  273. for (text <- texts) {
  274. buffer.put(recordHeader.array)
  275. buffer.putInt(text.length)
  276. buffer.put(text)
  277. buffer.put(0: Byte)
  278. }
  279. buffer.put(0: Byte)
  280. buffer
  281. }
  282. private def closeSocket() {
  283. synchronized {
  284. socket.foreach { s =>
  285. try {
  286. s.close()
  287. } catch {
  288. case _: Throwable =>
  289. }
  290. }
  291. socket = None
  292. }
  293. }
  294. override def close() {
  295. stats.incrCloses()
  296. // TODO consider draining the flusher queue before returning
  297. // nothing stops a pending flush from opening a new socket
  298. closeSocket()
  299. flusher.shutdown()
  300. }
  301. override def publish(record: javalog.LogRecord) {
  302. if (record.getLoggerName == loggerName) return
  303. publish(getFormatter.format(record).getBytes("UTF-8"))
  304. }
  305. def publish(record: Array[Byte]) {
  306. stats.incrPublished()
  307. if (!queue.offer(record)) stats.incrDroppedRecords()
  308. if (Time.now.since(_lastTransmission) >= bufferTime) flush()
  309. }
  310. override def toString = {
  311. ("<%s level=%s hostname=%s port=%d scribe_buffer=%s " +
  312. "scribe_backoff=%s scribe_max_packet_size=%d formatter=%s>").format(getClass.getName, getLevel,
  313. hostname, port, bufferTime, connectBackoff, maxMessagesPerTransaction, formatter.toString)
  314. }
  315. private[this] val SCRIBE_PREFIX: Array[Byte] = Array[Byte](
  316. // version 1, call, "Log", reqid=0
  317. 0x80.toByte, 1, 0, 1, 0, 0, 0, 3, 'L'.toByte, 'o'.toByte, 'g'.toByte, 0, 0, 0, 0,
  318. // list of structs
  319. 15, 0, 1, 12
  320. )
  321. private[this] val OLD_SCRIBE_PREFIX: Array[Byte] = Array[Byte](
  322. // (no version), "Log", reply, reqid=0
  323. 0, 0, 0, 3, 'L'.toByte, 'o'.toByte, 'g'.toByte, 1, 0, 0, 0, 0,
  324. // list of structs
  325. 15, 0, 1, 12
  326. )
  327. private[this] val SCRIBE_REPLY: Array[Byte] = Array[Byte](
  328. // version 1, reply, "Log", reqid=0
  329. 0x80.toByte, 1, 0, 2, 0, 0, 0, 3, 'L'.toByte, 'o'.toByte, 'g'.toByte, 0, 0, 0, 0,
  330. // int, fid 0, 0=ok
  331. 8, 0, 0, 0, 0, 0, 0, 0
  332. )
  333. private[this] val OLD_SCRIBE_REPLY: Array[Byte] = Array[Byte](
  334. 0, 0, 0, 20,
  335. // (no version), "Log", reply, reqid=0
  336. 0, 0, 0, 3, 'L'.toByte, 'o'.toByte, 'g'.toByte, 2, 0, 0, 0, 0,
  337. // int, fid 0, 0=ok
  338. 8, 0, 0, 0, 0, 0, 0, 0
  339. )
  340. private class ScribeHandlerStats(statsReceiver: StatsReceiver) {
  341. private var _lastLogStats = Time.epoch
  342. private val sentRecords = new AtomicLong()
  343. private val droppedRecords = new AtomicLong()
  344. private val connectionFailure = new AtomicLong()
  345. private val connectionSkipped = new AtomicLong()
  346. val totalSentRecords = statsReceiver.counter("sent_records")
  347. val totalDroppedRecords = statsReceiver.counter("dropped_records")
  348. val totalConnectionFailure = statsReceiver.counter("connection_failed")
  349. val totalConnectionSkipped = statsReceiver.counter("connection_skipped")
  350. val totalConnects = statsReceiver.counter("connects")
  351. val totalPublished = statsReceiver.counter("published")
  352. val totalCloses = statsReceiver.counter("closes")
  353. val instances = statsReceiver.addGauge("instances") { 1 }
  354. val unsentQueue = statsReceiver.addGauge("unsent_queue") { queueSize }
  355. def incrSentRecords(count: Int): Unit = {
  356. sentRecords.addAndGet(count)
  357. totalSentRecords.incr(count)
  358. }
  359. def incrDroppedRecords(count: Int = 1): Unit = {
  360. droppedRecords.incrementAndGet()
  361. totalDroppedRecords.incr()
  362. }
  363. def incrConnectionFailure(): Unit = {
  364. connectionFailure.incrementAndGet()
  365. totalConnectionFailure.incr()
  366. }
  367. def incrConnectionSkipped(): Unit = {
  368. connectionSkipped.incrementAndGet()
  369. totalConnectionSkipped.incr()
  370. }
  371. def incrPublished(): Unit = totalPublished.incr()
  372. def incrConnection(): Unit = totalConnects.incr()
  373. def incrCloses(): Unit = totalCloses.incr()
  374. def log(): Unit = {
  375. synchronized {
  376. val period = Time.now.since(_lastLogStats)
  377. if (period > ScribeHandler.DefaultStatsReportPeriod) {
  378. val sent = sentRecords.getAndSet(0)
  379. val dropped = droppedRecords.getAndSet(0)
  380. val failed = connectionFailure.getAndSet(0)
  381. val skipped = connectionSkipped.getAndSet(0)
  382. ScribeHandler.log.info("sent records: %d, per second: %d, dropped records: %d, reconnection failures: %d, reconnection skipped: %d",
  383. sent, sent / period.inSeconds, dropped, failed, skipped)
  384. _lastLogStats = Time.now
  385. }
  386. }
  387. }
  388. }
  389. }