PageRenderTime 49ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/third_party/protobuf/java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java

https://github.com/chromium/chromium
Java | 325 lines | 262 code | 32 blank | 31 comment | 24 complexity | eda5d45b3c728f720133049b26a29dee MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, Apache-2.0, BSD-3-Clause
  1. package com.google.protobuf;
  2. import com.google.protobuf.Utf8.Processor;
  3. import com.google.protobuf.Utf8.SafeProcessor;
  4. import com.google.protobuf.Utf8.UnsafeProcessor;
  5. import java.nio.ByteBuffer;
  6. import java.util.ArrayList;
  7. import java.util.List;
  8. import java.util.logging.Logger;
  9. import junit.framework.TestCase;
  10. public class DecodeUtf8Test extends TestCase {
  11. private static Logger logger = Logger.getLogger(DecodeUtf8Test.class.getName());
  12. private static final Processor SAFE_PROCESSOR = new SafeProcessor();
  13. private static final Processor UNSAFE_PROCESSOR = new UnsafeProcessor();
  14. public void testRoundTripAllValidChars() throws Exception {
  15. for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) {
  16. if (i < Character.MIN_SURROGATE || i > Character.MAX_SURROGATE) {
  17. String str = new String(Character.toChars(i));
  18. assertRoundTrips(str);
  19. }
  20. }
  21. }
  22. // Test all 1, 2, 3 invalid byte combinations. Valid ones would have been covered above.
  23. public void testOneByte() throws Exception {
  24. int valid = 0;
  25. for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
  26. ByteString bs = ByteString.copyFrom(new byte[] { (byte) i });
  27. if (!bs.isValidUtf8()) {
  28. assertInvalid(bs.toByteArray());
  29. } else {
  30. valid++;
  31. }
  32. }
  33. assertEquals(IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
  34. }
  35. public void testTwoBytes() throws Exception {
  36. int valid = 0;
  37. for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
  38. for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
  39. ByteString bs = ByteString.copyFrom(new byte[]{(byte) i, (byte) j});
  40. if (!bs.isValidUtf8()) {
  41. assertInvalid(bs.toByteArray());
  42. } else {
  43. valid++;
  44. }
  45. }
  46. }
  47. assertEquals(IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT, valid);
  48. }
  49. public void testThreeBytes() throws Exception {
  50. // Travis' OOM killer doesn't like this test
  51. if (System.getenv("TRAVIS") == null) {
  52. int count = 0;
  53. int valid = 0;
  54. for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
  55. for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
  56. for (int k = Byte.MIN_VALUE; k <= Byte.MAX_VALUE; k++) {
  57. byte[] bytes = new byte[]{(byte) i, (byte) j, (byte) k};
  58. ByteString bs = ByteString.copyFrom(bytes);
  59. if (!bs.isValidUtf8()) {
  60. assertInvalid(bytes);
  61. } else {
  62. valid++;
  63. }
  64. count++;
  65. if (count % 1000000L == 0) {
  66. logger.info("Processed " + (count / 1000000L) + " million characters");
  67. }
  68. }
  69. }
  70. }
  71. assertEquals(IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
  72. }
  73. }
  74. /**
  75. * Tests that round tripping of a sample of four byte permutations work.
  76. */
  77. public void testInvalid_4BytesSamples() throws Exception {
  78. // Bad trailing bytes
  79. assertInvalid(0xF0, 0xA4, 0xAD, 0x7F);
  80. assertInvalid(0xF0, 0xA4, 0xAD, 0xC0);
  81. // Special cases for byte2
  82. assertInvalid(0xF0, 0x8F, 0xAD, 0xA2);
  83. assertInvalid(0xF4, 0x90, 0xAD, 0xA2);
  84. }
  85. public void testRealStrings() throws Exception {
  86. // English
  87. assertRoundTrips("The quick brown fox jumps over the lazy dog");
  88. // German
  89. assertRoundTrips("Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens cirkusklovnen");
  90. // Japanese
  91. assertRoundTrips(
  92. "\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c\u308b\u3092");
  93. // Hebrew
  94. assertRoundTrips(
  95. "\u05d3\u05d2 \u05e1\u05e7\u05e8\u05df \u05e9\u05d8 \u05d1\u05d9\u05dd "
  96. + "\u05de\u05d0\u05d5\u05db\u05d6\u05d1 \u05d5\u05dc\u05e4\u05ea\u05e2"
  97. + " \u05de\u05e6\u05d0 \u05dc\u05d5 \u05d7\u05d1\u05e8\u05d4 "
  98. + "\u05d0\u05d9\u05da \u05d4\u05e7\u05dc\u05d9\u05d8\u05d4");
  99. // Thai
  100. assertRoundTrips(
  101. " \u0e08\u0e07\u0e1d\u0e48\u0e32\u0e1f\u0e31\u0e19\u0e1e\u0e31\u0e12"
  102. + "\u0e19\u0e32\u0e27\u0e34\u0e0a\u0e32\u0e01\u0e32\u0e23");
  103. // Chinese
  104. assertRoundTrips(
  105. "\u8fd4\u56de\u94fe\u4e2d\u7684\u4e0b\u4e00\u4e2a\u4ee3\u7406\u9879\u9009\u62e9\u5668");
  106. // Chinese with 4-byte chars
  107. assertRoundTrips("\uD841\uDF0E\uD841\uDF31\uD841\uDF79\uD843\uDC53\uD843\uDC78"
  108. + "\uD843\uDC96\uD843\uDCCF\uD843\uDCD5\uD843\uDD15\uD843\uDD7C\uD843\uDD7F"
  109. + "\uD843\uDE0E\uD843\uDE0F\uD843\uDE77\uD843\uDE9D\uD843\uDEA2");
  110. // Mixed
  111. assertRoundTrips(
  112. "The quick brown \u3044\u308d\u306f\u306b\u307b\u3078\u8fd4\u56de\u94fe"
  113. + "\u4e2d\u7684\u4e0b\u4e00");
  114. }
  115. public void testOverlong() throws Exception {
  116. assertInvalid(0xc0, 0xaf);
  117. assertInvalid(0xe0, 0x80, 0xaf);
  118. assertInvalid(0xf0, 0x80, 0x80, 0xaf);
  119. // Max overlong
  120. assertInvalid(0xc1, 0xbf);
  121. assertInvalid(0xe0, 0x9f, 0xbf);
  122. assertInvalid(0xf0 ,0x8f, 0xbf, 0xbf);
  123. // null overlong
  124. assertInvalid(0xc0, 0x80);
  125. assertInvalid(0xe0, 0x80, 0x80);
  126. assertInvalid(0xf0, 0x80, 0x80, 0x80);
  127. }
  128. public void testIllegalCodepoints() throws Exception {
  129. // Single surrogate
  130. assertInvalid(0xed, 0xa0, 0x80);
  131. assertInvalid(0xed, 0xad, 0xbf);
  132. assertInvalid(0xed, 0xae, 0x80);
  133. assertInvalid(0xed, 0xaf, 0xbf);
  134. assertInvalid(0xed, 0xb0, 0x80);
  135. assertInvalid(0xed, 0xbe, 0x80);
  136. assertInvalid(0xed, 0xbf, 0xbf);
  137. // Paired surrogates
  138. assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80);
  139. assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf);
  140. assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80);
  141. assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf);
  142. assertInvalid(0xed, 0xae, 0x80, 0xed, 0xb0, 0x80);
  143. assertInvalid(0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf);
  144. assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80);
  145. assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf);
  146. }
  147. public void testBufferSlice() throws Exception {
  148. String str = "The quick brown fox jumps over the lazy dog";
  149. assertRoundTrips(str, 10, 4);
  150. assertRoundTrips(str, str.length(), 0);
  151. }
  152. public void testInvalidBufferSlice() throws Exception {
  153. byte[] bytes = "The quick brown fox jumps over the lazy dog".getBytes(Internal.UTF_8);
  154. assertInvalidSlice(bytes, bytes.length - 3, 4);
  155. assertInvalidSlice(bytes, bytes.length, 1);
  156. assertInvalidSlice(bytes, bytes.length + 1, 0);
  157. assertInvalidSlice(bytes, 0, bytes.length + 1);
  158. }
  159. private void assertInvalid(int... bytesAsInt) throws Exception {
  160. byte[] bytes = new byte[bytesAsInt.length];
  161. for (int i = 0; i < bytesAsInt.length; i++) {
  162. bytes[i] = (byte) bytesAsInt[i];
  163. }
  164. assertInvalid(bytes);
  165. }
  166. private void assertInvalid(byte[] bytes) throws Exception {
  167. try {
  168. UNSAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
  169. fail();
  170. } catch (InvalidProtocolBufferException e) {
  171. // Expected.
  172. }
  173. try {
  174. SAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
  175. fail();
  176. } catch (InvalidProtocolBufferException e) {
  177. // Expected.
  178. }
  179. ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
  180. direct.put(bytes);
  181. direct.flip();
  182. try {
  183. UNSAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length);
  184. fail();
  185. } catch (InvalidProtocolBufferException e) {
  186. // Expected.
  187. }
  188. try {
  189. SAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length);
  190. fail();
  191. } catch (InvalidProtocolBufferException e) {
  192. // Expected.
  193. }
  194. ByteBuffer heap = ByteBuffer.allocate(bytes.length);
  195. heap.put(bytes);
  196. heap.flip();
  197. try {
  198. UNSAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length);
  199. fail();
  200. } catch (InvalidProtocolBufferException e) {
  201. // Expected.
  202. }
  203. try {
  204. SAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length);
  205. fail();
  206. } catch (InvalidProtocolBufferException e) {
  207. // Expected.
  208. }
  209. }
  210. private void assertInvalidSlice(byte[] bytes, int index, int size) throws Exception {
  211. try {
  212. UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size);
  213. fail();
  214. } catch (ArrayIndexOutOfBoundsException e) {
  215. // Expected.
  216. }
  217. try {
  218. SAFE_PROCESSOR.decodeUtf8(bytes, index, size);
  219. fail();
  220. } catch (ArrayIndexOutOfBoundsException e) {
  221. // Expected.
  222. }
  223. ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
  224. direct.put(bytes);
  225. direct.flip();
  226. try {
  227. UNSAFE_PROCESSOR.decodeUtf8(direct, index, size);
  228. fail();
  229. } catch (ArrayIndexOutOfBoundsException e) {
  230. // Expected.
  231. }
  232. try {
  233. SAFE_PROCESSOR.decodeUtf8(direct, index, size);
  234. fail();
  235. } catch (ArrayIndexOutOfBoundsException e) {
  236. // Expected.
  237. }
  238. ByteBuffer heap = ByteBuffer.allocate(bytes.length);
  239. heap.put(bytes);
  240. heap.flip();
  241. try {
  242. UNSAFE_PROCESSOR.decodeUtf8(heap, index, size);
  243. fail();
  244. } catch (ArrayIndexOutOfBoundsException e) {
  245. // Expected.
  246. }
  247. try {
  248. SAFE_PROCESSOR.decodeUtf8(heap, index, size);
  249. fail();
  250. } catch (ArrayIndexOutOfBoundsException e) {
  251. // Expected.
  252. }
  253. }
  254. private void assertRoundTrips(String str) throws Exception {
  255. assertRoundTrips(str, 0, -1);
  256. }
  257. private void assertRoundTrips(String str, int index, int size) throws Exception {
  258. byte[] bytes = str.getBytes(Internal.UTF_8);
  259. if (size == -1) {
  260. size = bytes.length;
  261. }
  262. assertDecode(new String(bytes, index, size, Internal.UTF_8),
  263. UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size));
  264. assertDecode(new String(bytes, index, size, Internal.UTF_8),
  265. SAFE_PROCESSOR.decodeUtf8(bytes, index, size));
  266. ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
  267. direct.put(bytes);
  268. direct.flip();
  269. assertDecode(new String(bytes, index, size, Internal.UTF_8),
  270. UNSAFE_PROCESSOR.decodeUtf8(direct, index, size));
  271. assertDecode(new String(bytes, index, size, Internal.UTF_8),
  272. SAFE_PROCESSOR.decodeUtf8(direct, index, size));
  273. ByteBuffer heap = ByteBuffer.allocate(bytes.length);
  274. heap.put(bytes);
  275. heap.flip();
  276. assertDecode(new String(bytes, index, size, Internal.UTF_8),
  277. UNSAFE_PROCESSOR.decodeUtf8(heap, index, size));
  278. assertDecode(new String(bytes, index, size, Internal.UTF_8),
  279. SAFE_PROCESSOR.decodeUtf8(heap, index, size));
  280. }
  281. private void assertDecode(String expected, String actual) {
  282. if (!expected.equals(actual)) {
  283. fail("Failure: Expected (" + codepoints(expected) + ") Actual (" + codepoints(actual) + ")");
  284. }
  285. }
  286. private List<String> codepoints(String str) {
  287. List<String> codepoints = new ArrayList<String>();
  288. for (int i = 0; i < str.length(); i++) {
  289. codepoints.add(Long.toHexString(str.charAt(i)));
  290. }
  291. return codepoints;
  292. }
  293. }