PageRenderTime 49ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/external/chromium/sdch/open-vcdiff/src/addrcache.cc

https://gitlab.com/brian0218/rk3188_rk3066_r-box_android4.4.2_sdk
C++ | 331 lines | 166 code | 15 blank | 150 comment | 32 complexity | 1c8985c427c1f4f1da2a384d1e338682 MD5 | raw file
  1. // Copyright 2007 Google Inc.
  2. // Author: Lincoln Smith
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. // Implementation of the Address Cache and Address Encoding
  17. // algorithms described in sections 5.1 - 5.4 of RFC 3284 -
  18. // The VCDIFF Generic Differencing and Compression Data Format.
  19. // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
  20. //
  21. // Assumptions:
  22. // * The VCDAddress type is large enough to hold any offset within
  23. // the source and target windows. The limit (for int32_t) is 2^31-1 bytes.
  24. // The source (dictionary) should not approach this size limit;
  25. // to compress a target file that is larger than
  26. // INT_MAX - (dictionary size) bytes, the encoder must
  27. // break it up into multiple target windows.
  28. #include <config.h>
  29. #include "addrcache.h"
  30. #include "logging.h"
  31. #include "varint_bigendian.h"
  32. #include "vcdiff_defs.h" // RESULT_ERROR
  33. namespace open_vcdiff {
  34. // The constructor does not initialize near_addresses_ and same_addresses_.
  35. // Therefore, Init() must be called before any other method can be used.
  36. //
  37. // Arguments:
  38. // near_cache_size: Size of the NEAR cache (number of 4-byte integers)
  39. // same_cache_size: Size of the SAME cache (number of blocks of
  40. // 256 4-byte integers per block)
  41. // Because the mode is expressed as a byte value,
  42. // near_cache_size + same_cache_size should not exceed 254.
  43. //
  44. VCDiffAddressCache::VCDiffAddressCache(int near_cache_size,
  45. int same_cache_size)
  46. : near_cache_size_(near_cache_size),
  47. same_cache_size_(same_cache_size),
  48. next_slot_(0) { }
  49. VCDiffAddressCache::VCDiffAddressCache()
  50. : near_cache_size_(kDefaultNearCacheSize),
  51. same_cache_size_(kDefaultSameCacheSize),
  52. next_slot_(0) { }
  53. // Sets up data structures needed to call other methods. Operations that may
  54. // fail at runtime (for example, validating the provided near_cache_size_ and
  55. // same_cache_size_ parameters against their maximum allowed values) are
  56. // confined to this routine in order to guarantee that the class constructor
  57. // will never fail. Other methods (except the destructor) cannot be invoked
  58. // until this method has been called successfully. After the object has been
  59. // initialized and used, Init() can be called again to reset it to its initial
  60. // state.
  61. //
  62. // Return value: "true" if initialization succeeded, "false" if it failed.
  63. // No other method except the destructor may be invoked if this function
  64. // returns false. The caller is responsible for checking the return value
  65. // and providing an exit path in case of error.
  66. //
  67. bool VCDiffAddressCache::Init() {
  68. // The mode is expressed as a byte value, so there is only room for 256 modes,
  69. // including the two non-cached modes (SELF and HERE). Do not allow a larger
  70. // number of modes to be defined. We do a separate sanity check for
  71. // near_cache_size_ and same_cache_size_ because adding them together can
  72. // cause an integer overflow if each is set to, say, INT_MAX.
  73. if ((near_cache_size_ > (VCD_MAX_MODES - 2)) || (near_cache_size_ < 0)) {
  74. LOG(ERROR) << "Near cache size " << near_cache_size_ << " is invalid"
  75. << LOG_ENDL;
  76. return false;
  77. }
  78. if ((same_cache_size_ > (VCD_MAX_MODES - 2)) || (same_cache_size_ < 0)) {
  79. LOG(ERROR) << "Same cache size " << same_cache_size_ << " is invalid"
  80. << LOG_ENDL;
  81. return false;
  82. }
  83. if ((near_cache_size_ + same_cache_size_) > VCD_MAX_MODES - 2) {
  84. LOG(ERROR) << "Using near cache size " << near_cache_size_
  85. << " and same cache size " << same_cache_size_
  86. << " would exceed maximum number of COPY modes ("
  87. << VCD_MAX_MODES << ")" << LOG_ENDL;
  88. return false;
  89. }
  90. if (near_cache_size_ > 0) {
  91. near_addresses_.assign(near_cache_size_, 0);
  92. }
  93. if (same_cache_size_ > 0) {
  94. same_addresses_.assign(same_cache_size_ * 256, 0);
  95. }
  96. next_slot_ = 0; // in case Init() is called a second time to reinit
  97. return true;
  98. }
  99. // This method will be called whenever an address is calculated for an
  100. // encoded or decoded COPY instruction, and will update the contents
  101. // of the SAME and NEAR caches. It is vital that the use of
  102. // UpdateCache (called cache_update in the RFC examples) exactly match
  103. // the RFC standard, and that the same caching logic be used in the
  104. // decoder as in the encoder, in order for the decoded addresses to
  105. // match.
  106. //
  107. // Argument:
  108. // address: This must be a valid address between 0 and
  109. // (source window size + target window size). It is assumed that
  110. // these bounds have been checked before calling UpdateCache.
  111. //
  112. void VCDiffAddressCache::UpdateCache(VCDAddress address) {
  113. if (near_cache_size_ > 0) {
  114. near_addresses_[next_slot_] = address;
  115. next_slot_ = (next_slot_ + 1) % near_cache_size_;
  116. }
  117. if (same_cache_size_ > 0) {
  118. same_addresses_[address % (same_cache_size_ * 256)] = address;
  119. }
  120. }
  121. // Determines the address mode that yields the most compact encoding
  122. // of the given address value, writes the encoded address into the
  123. // address stream, and returns the mode used. The most compact encoding
  124. // is found by looking for the numerically lowest encoded address.
  125. // The Init() function must already have been called.
  126. //
  127. // Arguments:
  128. // address: The address to be encoded. Must be a non-negative integer
  129. // between 0 and (here_address - 1).
  130. // here_address: The current location in the target data (i.e., the
  131. // position just after the last encoded value.) Must be non-negative.
  132. // encoded_addr: Points to an VCDAddress that will be replaced
  133. // with the encoded representation of address.
  134. // If WriteAddressAsVarintForMode returns true when passed
  135. // the return value, then encoded_addr should be written
  136. // into the delta file as a variable-length integer (Varint);
  137. // otherwise, it should be written as a byte (unsigned char).
  138. //
  139. // Return value: A mode value between 0 and 255. The mode will tell
  140. // how to interpret the next value in the address stream.
  141. // The values 0 and 1 correspond to SELF and HERE addressing.
  142. //
  143. // The function is guaranteed to succeed unless the conditions on the arguments
  144. // have not been met, in which case a LOG(DFATAL) message will be produced,
  145. // 0 will be returned, and *encoded_addr will be replaced with 0.
  146. //
  147. unsigned char VCDiffAddressCache::EncodeAddress(VCDAddress address,
  148. VCDAddress here_address,
  149. VCDAddress* encoded_addr) {
  150. if (address < 0) {
  151. LOG(DFATAL) << "EncodeAddress was passed a negative address: "
  152. << address << LOG_ENDL;
  153. *encoded_addr = 0;
  154. return 0;
  155. }
  156. if (address >= here_address) {
  157. LOG(DFATAL) << "EncodeAddress was called with address (" << address
  158. << ") < here_address (" << here_address << ")" << LOG_ENDL;
  159. *encoded_addr = 0;
  160. return 0;
  161. }
  162. // Try using the SAME cache. This method, if available, always
  163. // results in the smallest encoding and takes priority over other modes.
  164. if (same_cache_size() > 0) {
  165. const VCDAddress same_cache_pos =
  166. address % (same_cache_size() * 256);
  167. if (SameAddress(same_cache_pos) == address) {
  168. // This is the only mode for which an single byte will be written
  169. // to the address stream instead of a variable-length integer.
  170. UpdateCache(address);
  171. *encoded_addr = same_cache_pos % 256;
  172. return FirstSameMode() + (same_cache_pos / 256); // SAME mode
  173. }
  174. }
  175. // Try SELF mode
  176. unsigned char best_mode = VCD_SELF_MODE;
  177. VCDAddress best_encoded_address = address;
  178. // Try HERE mode
  179. {
  180. const VCDAddress here_encoded_address = here_address - address;
  181. if (here_encoded_address < best_encoded_address) {
  182. best_mode = VCD_HERE_MODE;
  183. best_encoded_address = here_encoded_address;
  184. }
  185. }
  186. // Try using the NEAR cache
  187. for (int i = 0; i < near_cache_size(); ++i) {
  188. const VCDAddress near_encoded_address = address - NearAddress(i);
  189. if ((near_encoded_address >= 0) &&
  190. (near_encoded_address < best_encoded_address)) {
  191. best_mode = FirstNearMode() + i;
  192. best_encoded_address = near_encoded_address;
  193. }
  194. }
  195. UpdateCache(address);
  196. *encoded_addr = best_encoded_address;
  197. return best_mode;
  198. }
  199. // Increments *byte_pointer and returns the byte it pointed to before the
  200. // increment. The caller must check bounds to ensure that *byte_pointer
  201. // points to a valid address in memory.
  202. static unsigned char ParseByte(const char** byte_pointer) {
  203. unsigned char byte_value = static_cast<unsigned char>(**byte_pointer);
  204. ++(*byte_pointer);
  205. return byte_value;
  206. }
  207. // Checks the given decoded address for validity. Returns true if the
  208. // address is valid; otherwise, prints an error message to the log and
  209. // returns false.
  210. static bool IsDecodedAddressValid(VCDAddress decoded_address,
  211. VCDAddress here_address) {
  212. if (decoded_address < 0) {
  213. LOG(ERROR) << "Decoded address " << decoded_address << " is invalid"
  214. << LOG_ENDL;
  215. return false;
  216. } else if (decoded_address >= here_address) {
  217. LOG(ERROR) << "Decoded address (" << decoded_address
  218. << ") is beyond location in target file (" << here_address
  219. << ")" << LOG_ENDL;
  220. return false;
  221. }
  222. return true;
  223. }
  224. // Interprets the next value in the address_stream using the provided mode,
  225. // which may need to access the SAME or NEAR address cache. Returns the
  226. // decoded address.
  227. // The Init() function must already have been called.
  228. //
  229. // Arguments:
  230. // here_address: The current location in the source + target data (i.e., the
  231. // location into which the COPY instruction will copy.) By definition,
  232. // all addresses between 0 and (here_address - 1) are valid, and
  233. // any other address is invalid.
  234. // mode: A byte value between 0 and (near_cache_size_ + same_cache_size_ + 1)
  235. // which tells how to interpret the next value in the address stream.
  236. // The values 0 and 1 correspond to SELF and HERE addressing.
  237. // The validity of "mode" should already have been checked before
  238. // calling this function.
  239. // address_stream: Points to a pointer holding the position
  240. // in the "Addresses section for COPYs" part of the input data.
  241. // That section must already have been uncompressed
  242. // using a secondary decompressor (if necessary.)
  243. // This is an IN/OUT argument; the value of *address_stream will be
  244. // incremented by the size of an integer, or (if the SAME cache
  245. // was used) by the size of a byte (1).
  246. // address_stream_end: Points to the position just after the end of
  247. // the address stream buffer. All addresses between *address_stream
  248. // and address_stream_end should contain valid address data.
  249. //
  250. // Return value: If the input conditions were met, and the address section
  251. // of the input data contains properly encoded addresses that match
  252. // the instructions section, then an integer between 0 and here_address - 1
  253. // will be returned, representing the address from which data should
  254. // be copied from the source or target window into the output stream.
  255. // If an invalid address value is found in address_stream, then
  256. // RESULT_ERROR will be returned. If the limit address_stream_end
  257. // is reached before the address can be decoded, then
  258. // RESULT_END_OF_DATA will be returned. If more streamed data
  259. // is expected, this means that the consumer should block and wait
  260. // for more data before continuing to decode. If no more data is expected,
  261. // this return value signals an error condition.
  262. //
  263. VCDAddress VCDiffAddressCache::DecodeAddress(VCDAddress here_address,
  264. unsigned char mode,
  265. const char** address_stream,
  266. const char* address_stream_end) {
  267. if (here_address < 0) {
  268. LOG(DFATAL) << "DecodeAddress was passed a negative value"
  269. " for here_address: " << here_address << LOG_ENDL;
  270. return RESULT_ERROR;
  271. }
  272. const char* new_address_pos = *address_stream;
  273. if (new_address_pos >= address_stream_end) {
  274. return RESULT_END_OF_DATA;
  275. }
  276. VCDAddress decoded_address;
  277. if (IsSameMode(mode)) {
  278. // SAME mode expects a byte value as the encoded address
  279. unsigned char encoded_address = ParseByte(&new_address_pos);
  280. decoded_address = DecodeSameAddress(mode, encoded_address);
  281. } else {
  282. // All modes except SAME mode expect a VarintBE as the encoded address
  283. int32_t encoded_address = VarintBE<int32_t>::Parse(address_stream_end,
  284. &new_address_pos);
  285. switch (encoded_address) {
  286. case RESULT_ERROR:
  287. LOG(ERROR) << "Found invalid variable-length integer "
  288. "as encoded address value" << LOG_ENDL;
  289. return RESULT_ERROR;
  290. case RESULT_END_OF_DATA:
  291. return RESULT_END_OF_DATA;
  292. default:
  293. break;
  294. }
  295. if (IsSelfMode(mode)) {
  296. decoded_address = DecodeSelfAddress(encoded_address);
  297. } else if (IsHereMode(mode)) {
  298. decoded_address = DecodeHereAddress(encoded_address, here_address);
  299. } else if (IsNearMode(mode)) {
  300. decoded_address = DecodeNearAddress(mode, encoded_address);
  301. } else {
  302. LOG(DFATAL) << "Invalid mode value (" << static_cast<int>(mode)
  303. << ") passed to DecodeAddress; maximum mode value = "
  304. << static_cast<int>(LastMode()) << LOG_ENDL;
  305. return RESULT_ERROR;
  306. }
  307. }
  308. // Check for an out-of-bounds address (corrupt/malicious data)
  309. if (!IsDecodedAddressValid(decoded_address, here_address)) {
  310. return RESULT_ERROR;
  311. }
  312. *address_stream = new_address_pos;
  313. UpdateCache(decoded_address);
  314. return decoded_address;
  315. }
  316. } // namespace open_vcdiff