PageRenderTime 1892ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/third_party/gofrontend/libgo/go/compress/flate/huffman_bit_writer.go

http://github.com/axw/llgo
Go | 517 lines | 413 code | 40 blank | 64 comment | 86 complexity | 627d9f1a641763732a162077df7ea776 MD5 | raw file
Possible License(s): BSD-3-Clause, MIT
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package flate
  5. import (
  6. "io"
  7. "math"
  8. )
  9. const (
  10. // The largest offset code.
  11. offsetCodeCount = 30
  12. // The special code used to mark the end of a block.
  13. endBlockMarker = 256
  14. // The first length code.
  15. lengthCodesStart = 257
  16. // The number of codegen codes.
  17. codegenCodeCount = 19
  18. badCode = 255
  19. )
  20. // The number of extra bits needed by length code X - LENGTH_CODES_START.
  21. var lengthExtraBits = []int8{
  22. /* 257 */ 0, 0, 0,
  23. /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
  24. /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
  25. /* 280 */ 4, 5, 5, 5, 5, 0,
  26. }
  27. // The length indicated by length code X - LENGTH_CODES_START.
  28. var lengthBase = []uint32{
  29. 0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
  30. 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
  31. 64, 80, 96, 112, 128, 160, 192, 224, 255,
  32. }
  33. // offset code word extra bits.
  34. var offsetExtraBits = []int8{
  35. 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
  36. 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
  37. 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
  38. /* extended window */
  39. 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
  40. }
  41. var offsetBase = []uint32{
  42. /* normal deflate */
  43. 0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
  44. 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
  45. 0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
  46. 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
  47. 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
  48. 0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
  49. /* extended window */
  50. 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
  51. 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
  52. 0x100000, 0x180000, 0x200000, 0x300000,
  53. }
  54. // The odd order in which the codegen code sizes are written.
  55. var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  56. type huffmanBitWriter struct {
  57. w io.Writer
  58. // Data waiting to be written is bytes[0:nbytes]
  59. // and then the low nbits of bits.
  60. bits uint32
  61. nbits uint32
  62. bytes [64]byte
  63. nbytes int
  64. literalFreq []int32
  65. offsetFreq []int32
  66. codegen []uint8
  67. codegenFreq []int32
  68. literalEncoding *huffmanEncoder
  69. offsetEncoding *huffmanEncoder
  70. codegenEncoding *huffmanEncoder
  71. err error
  72. }
  73. func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
  74. return &huffmanBitWriter{
  75. w: w,
  76. literalFreq: make([]int32, maxNumLit),
  77. offsetFreq: make([]int32, offsetCodeCount),
  78. codegen: make([]uint8, maxNumLit+offsetCodeCount+1),
  79. codegenFreq: make([]int32, codegenCodeCount),
  80. literalEncoding: newHuffmanEncoder(maxNumLit),
  81. offsetEncoding: newHuffmanEncoder(offsetCodeCount),
  82. codegenEncoding: newHuffmanEncoder(codegenCodeCount),
  83. }
  84. }
  85. func (w *huffmanBitWriter) reset(writer io.Writer) {
  86. w.w = writer
  87. w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
  88. w.bytes = [64]byte{}
  89. for i := range w.codegen {
  90. w.codegen[i] = 0
  91. }
  92. for _, s := range [...][]int32{w.literalFreq, w.offsetFreq, w.codegenFreq} {
  93. for i := range s {
  94. s[i] = 0
  95. }
  96. }
  97. for _, enc := range [...]*huffmanEncoder{
  98. w.literalEncoding,
  99. w.offsetEncoding,
  100. w.codegenEncoding} {
  101. for i := range enc.code {
  102. enc.code[i] = 0
  103. }
  104. for i := range enc.codeBits {
  105. enc.codeBits[i] = 0
  106. }
  107. }
  108. }
  109. func (w *huffmanBitWriter) flushBits() {
  110. if w.err != nil {
  111. w.nbits = 0
  112. return
  113. }
  114. bits := w.bits
  115. w.bits >>= 16
  116. w.nbits -= 16
  117. n := w.nbytes
  118. w.bytes[n] = byte(bits)
  119. w.bytes[n+1] = byte(bits >> 8)
  120. if n += 2; n >= len(w.bytes) {
  121. _, w.err = w.w.Write(w.bytes[0:])
  122. n = 0
  123. }
  124. w.nbytes = n
  125. }
  126. func (w *huffmanBitWriter) flush() {
  127. if w.err != nil {
  128. w.nbits = 0
  129. return
  130. }
  131. n := w.nbytes
  132. if w.nbits > 8 {
  133. w.bytes[n] = byte(w.bits)
  134. w.bits >>= 8
  135. w.nbits -= 8
  136. n++
  137. }
  138. if w.nbits > 0 {
  139. w.bytes[n] = byte(w.bits)
  140. w.nbits = 0
  141. n++
  142. }
  143. w.bits = 0
  144. _, w.err = w.w.Write(w.bytes[0:n])
  145. w.nbytes = 0
  146. }
  147. func (w *huffmanBitWriter) writeBits(b, nb int32) {
  148. w.bits |= uint32(b) << w.nbits
  149. if w.nbits += uint32(nb); w.nbits >= 16 {
  150. w.flushBits()
  151. }
  152. }
  153. func (w *huffmanBitWriter) writeBytes(bytes []byte) {
  154. if w.err != nil {
  155. return
  156. }
  157. n := w.nbytes
  158. if w.nbits == 8 {
  159. w.bytes[n] = byte(w.bits)
  160. w.nbits = 0
  161. n++
  162. }
  163. if w.nbits != 0 {
  164. w.err = InternalError("writeBytes with unfinished bits")
  165. return
  166. }
  167. if n != 0 {
  168. _, w.err = w.w.Write(w.bytes[0:n])
  169. if w.err != nil {
  170. return
  171. }
  172. }
  173. w.nbytes = 0
  174. _, w.err = w.w.Write(bytes)
  175. }
  176. // RFC 1951 3.2.7 specifies a special run-length encoding for specifying
  177. // the literal and offset lengths arrays (which are concatenated into a single
  178. // array). This method generates that run-length encoding.
  179. //
  180. // The result is written into the codegen array, and the frequencies
  181. // of each code is written into the codegenFreq array.
  182. // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
  183. // information. Code badCode is an end marker
  184. //
  185. // numLiterals The number of literals in literalEncoding
  186. // numOffsets The number of offsets in offsetEncoding
  187. func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int) {
  188. for i := range w.codegenFreq {
  189. w.codegenFreq[i] = 0
  190. }
  191. // Note that we are using codegen both as a temporary variable for holding
  192. // a copy of the frequencies, and as the place where we put the result.
  193. // This is fine because the output is always shorter than the input used
  194. // so far.
  195. codegen := w.codegen // cache
  196. // Copy the concatenated code sizes to codegen. Put a marker at the end.
  197. copy(codegen[0:numLiterals], w.literalEncoding.codeBits)
  198. copy(codegen[numLiterals:numLiterals+numOffsets], w.offsetEncoding.codeBits)
  199. codegen[numLiterals+numOffsets] = badCode
  200. size := codegen[0]
  201. count := 1
  202. outIndex := 0
  203. for inIndex := 1; size != badCode; inIndex++ {
  204. // INVARIANT: We have seen "count" copies of size that have not yet
  205. // had output generated for them.
  206. nextSize := codegen[inIndex]
  207. if nextSize == size {
  208. count++
  209. continue
  210. }
  211. // We need to generate codegen indicating "count" of size.
  212. if size != 0 {
  213. codegen[outIndex] = size
  214. outIndex++
  215. w.codegenFreq[size]++
  216. count--
  217. for count >= 3 {
  218. n := 6
  219. if n > count {
  220. n = count
  221. }
  222. codegen[outIndex] = 16
  223. outIndex++
  224. codegen[outIndex] = uint8(n - 3)
  225. outIndex++
  226. w.codegenFreq[16]++
  227. count -= n
  228. }
  229. } else {
  230. for count >= 11 {
  231. n := 138
  232. if n > count {
  233. n = count
  234. }
  235. codegen[outIndex] = 18
  236. outIndex++
  237. codegen[outIndex] = uint8(n - 11)
  238. outIndex++
  239. w.codegenFreq[18]++
  240. count -= n
  241. }
  242. if count >= 3 {
  243. // count >= 3 && count <= 10
  244. codegen[outIndex] = 17
  245. outIndex++
  246. codegen[outIndex] = uint8(count - 3)
  247. outIndex++
  248. w.codegenFreq[17]++
  249. count = 0
  250. }
  251. }
  252. count--
  253. for ; count >= 0; count-- {
  254. codegen[outIndex] = size
  255. outIndex++
  256. w.codegenFreq[size]++
  257. }
  258. // Set up invariant for next time through the loop.
  259. size = nextSize
  260. count = 1
  261. }
  262. // Marker indicating the end of the codegen.
  263. codegen[outIndex] = badCode
  264. }
  265. func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) {
  266. if w.err != nil {
  267. return
  268. }
  269. w.writeBits(int32(code.code[literal]), int32(code.codeBits[literal]))
  270. }
  271. // Write the header of a dynamic Huffman block to the output stream.
  272. //
  273. // numLiterals The number of literals specified in codegen
  274. // numOffsets The number of offsets specified in codegen
  275. // numCodegens The number of codegens used in codegen
  276. func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
  277. if w.err != nil {
  278. return
  279. }
  280. var firstBits int32 = 4
  281. if isEof {
  282. firstBits = 5
  283. }
  284. w.writeBits(firstBits, 3)
  285. w.writeBits(int32(numLiterals-257), 5)
  286. w.writeBits(int32(numOffsets-1), 5)
  287. w.writeBits(int32(numCodegens-4), 4)
  288. for i := 0; i < numCodegens; i++ {
  289. value := w.codegenEncoding.codeBits[codegenOrder[i]]
  290. w.writeBits(int32(value), 3)
  291. }
  292. i := 0
  293. for {
  294. var codeWord int = int(w.codegen[i])
  295. i++
  296. if codeWord == badCode {
  297. break
  298. }
  299. // The low byte contains the actual code to generate.
  300. w.writeCode(w.codegenEncoding, uint32(codeWord))
  301. switch codeWord {
  302. case 16:
  303. w.writeBits(int32(w.codegen[i]), 2)
  304. i++
  305. break
  306. case 17:
  307. w.writeBits(int32(w.codegen[i]), 3)
  308. i++
  309. break
  310. case 18:
  311. w.writeBits(int32(w.codegen[i]), 7)
  312. i++
  313. break
  314. }
  315. }
  316. }
  317. func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
  318. if w.err != nil {
  319. return
  320. }
  321. var flag int32
  322. if isEof {
  323. flag = 1
  324. }
  325. w.writeBits(flag, 3)
  326. w.flush()
  327. w.writeBits(int32(length), 16)
  328. w.writeBits(int32(^uint16(length)), 16)
  329. }
  330. func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
  331. if w.err != nil {
  332. return
  333. }
  334. // Indicate that we are a fixed Huffman block
  335. var value int32 = 2
  336. if isEof {
  337. value = 3
  338. }
  339. w.writeBits(value, 3)
  340. }
  341. func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
  342. if w.err != nil {
  343. return
  344. }
  345. for i := range w.literalFreq {
  346. w.literalFreq[i] = 0
  347. }
  348. for i := range w.offsetFreq {
  349. w.offsetFreq[i] = 0
  350. }
  351. n := len(tokens)
  352. tokens = tokens[0 : n+1]
  353. tokens[n] = endBlockMarker
  354. for _, t := range tokens {
  355. switch t.typ() {
  356. case literalType:
  357. w.literalFreq[t.literal()]++
  358. case matchType:
  359. length := t.length()
  360. offset := t.offset()
  361. w.literalFreq[lengthCodesStart+lengthCode(length)]++
  362. w.offsetFreq[offsetCode(offset)]++
  363. }
  364. }
  365. // get the number of literals
  366. numLiterals := len(w.literalFreq)
  367. for w.literalFreq[numLiterals-1] == 0 {
  368. numLiterals--
  369. }
  370. // get the number of offsets
  371. numOffsets := len(w.offsetFreq)
  372. for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
  373. numOffsets--
  374. }
  375. if numOffsets == 0 {
  376. // We haven't found a single match. If we want to go with the dynamic encoding,
  377. // we should count at least one offset to be sure that the offset huffman tree could be encoded.
  378. w.offsetFreq[0] = 1
  379. numOffsets = 1
  380. }
  381. w.literalEncoding.generate(w.literalFreq, 15)
  382. w.offsetEncoding.generate(w.offsetFreq, 15)
  383. storedBytes := 0
  384. if input != nil {
  385. storedBytes = len(input)
  386. }
  387. var extraBits int64
  388. var storedSize int64 = math.MaxInt64
  389. if storedBytes <= maxStoreBlockSize && input != nil {
  390. storedSize = int64((storedBytes + 5) * 8)
  391. // We only bother calculating the costs of the extra bits required by
  392. // the length of offset fields (which will be the same for both fixed
  393. // and dynamic encoding), if we need to compare those two encodings
  394. // against stored encoding.
  395. for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
  396. // First eight length codes have extra size = 0.
  397. extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart])
  398. }
  399. for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
  400. // First four offset codes have extra size = 0.
  401. extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode])
  402. }
  403. }
  404. // Figure out smallest code.
  405. // Fixed Huffman baseline.
  406. var size = int64(3) +
  407. fixedLiteralEncoding.bitLength(w.literalFreq) +
  408. fixedOffsetEncoding.bitLength(w.offsetFreq) +
  409. extraBits
  410. var literalEncoding = fixedLiteralEncoding
  411. var offsetEncoding = fixedOffsetEncoding
  412. // Dynamic Huffman?
  413. var numCodegens int
  414. // Generate codegen and codegenFrequencies, which indicates how to encode
  415. // the literalEncoding and the offsetEncoding.
  416. w.generateCodegen(numLiterals, numOffsets)
  417. w.codegenEncoding.generate(w.codegenFreq, 7)
  418. numCodegens = len(w.codegenFreq)
  419. for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
  420. numCodegens--
  421. }
  422. dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
  423. w.codegenEncoding.bitLength(w.codegenFreq) +
  424. int64(extraBits) +
  425. int64(w.codegenFreq[16]*2) +
  426. int64(w.codegenFreq[17]*3) +
  427. int64(w.codegenFreq[18]*7)
  428. dynamicSize := dynamicHeader +
  429. w.literalEncoding.bitLength(w.literalFreq) +
  430. w.offsetEncoding.bitLength(w.offsetFreq)
  431. if dynamicSize < size {
  432. size = dynamicSize
  433. literalEncoding = w.literalEncoding
  434. offsetEncoding = w.offsetEncoding
  435. }
  436. // Stored bytes?
  437. if storedSize < size {
  438. w.writeStoredHeader(storedBytes, eof)
  439. w.writeBytes(input[0:storedBytes])
  440. return
  441. }
  442. // Huffman.
  443. if literalEncoding == fixedLiteralEncoding {
  444. w.writeFixedHeader(eof)
  445. } else {
  446. w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
  447. }
  448. for _, t := range tokens {
  449. switch t.typ() {
  450. case literalType:
  451. w.writeCode(literalEncoding, t.literal())
  452. break
  453. case matchType:
  454. // Write the length
  455. length := t.length()
  456. lengthCode := lengthCode(length)
  457. w.writeCode(literalEncoding, lengthCode+lengthCodesStart)
  458. extraLengthBits := int32(lengthExtraBits[lengthCode])
  459. if extraLengthBits > 0 {
  460. extraLength := int32(length - lengthBase[lengthCode])
  461. w.writeBits(extraLength, extraLengthBits)
  462. }
  463. // Write the offset
  464. offset := t.offset()
  465. offsetCode := offsetCode(offset)
  466. w.writeCode(offsetEncoding, offsetCode)
  467. extraOffsetBits := int32(offsetExtraBits[offsetCode])
  468. if extraOffsetBits > 0 {
  469. extraOffset := int32(offset - offsetBase[offsetCode])
  470. w.writeBits(extraOffset, extraOffsetBits)
  471. }
  472. break
  473. default:
  474. panic("unknown token type: " + string(t))
  475. }
  476. }
  477. }