/hphp/test/zend/bad/ext/standard/tests/strings/bug49785.php

https://github.com/tstarling/hiphop-php · PHP · 136 lines · 125 code · 10 blank · 1 comment · 0 complexity · c8b2536a1920cbcf467f96e3c94272e7 MD5 · raw file

  1. <?php
  2. function _bin2hex($val) {
  3. return is_string($val) ? bin2hex($val): $val;
  4. }
  5. echo "UTF-8: basic tests\n";
  6. var_dump(_bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
  7. var_dump(_bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
  8. var_dump(_bin2hex(htmlentities("\xc2\x00", ENT_QUOTES, "UTF-8")));
  9. var_dump(_bin2hex(htmlentities("\xc2\xc0", ENT_QUOTES, "UTF-8")));
  10. var_dump(_bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
  11. var_dump(_bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
  12. var_dump(_bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
  13. var_dump(_bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
  14. var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
  15. var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
  16. var_dump(_bin2hex(htmlentities("\xe0\x1f\xbf", ENT_QUOTES, "UTF-8")));
  17. var_dump(_bin2hex(htmlentities("\xe0\x9f\x3f", ENT_QUOTES, "UTF-8")));
  18. var_dump(_bin2hex(htmlentities("\xe0\x1f\x3f", ENT_QUOTES, "UTF-8")));
  19. var_dump(_bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
  20. var_dump(_bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
  21. var_dump(_bin2hex(htmlentities("\xef\xff\xbf", ENT_QUOTES, "UTF-8")));
  22. var_dump(_bin2hex(htmlentities("\xef\xbf\xff", ENT_QUOTES, "UTF-8")));
  23. var_dump(_bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
  24. var_dump(_bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
  25. var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
  26. var_dump(_bin2hex(htmlentities("\xf7\x3f\xbf\xbf", ENT_QUOTES, "UTF-8")));
  27. var_dump(_bin2hex(htmlentities("\xf7\xbf\x3f\xbf", ENT_QUOTES, "UTF-8")));
  28. var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\x3f", ENT_QUOTES, "UTF-8")));
  29. var_dump(_bin2hex(htmlentities("\xf7\xff\xbf\xbf", ENT_QUOTES, "UTF-8")));
  30. var_dump(_bin2hex(htmlentities("\xf7\xbf\xff\xbf", ENT_QUOTES, "UTF-8")));
  31. var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
  32. var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
  33. echo "--\n";
  34. echo "UTF-8: with ENT_IGNORE\n";
  35. var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
  36. var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
  37. var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
  38. echo "--\n";
  39. echo "UTF-8: alternative (invalid) UTF-8 sequence / surrogate pairs\n";
  40. var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
  41. var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8')));
  42. var_dump(_bin2hex(htmlspecialchars("\xf0\x80\x80\xa6", ENT_QUOTES, 'UTF-8')));
  43. var_dump(_bin2hex(htmlspecialchars("\xec\xbf\xbf", ENT_QUOTES, 'UTF-8')));
  44. var_dump(_bin2hex(htmlspecialchars("\xed\xa0\x80", ENT_QUOTES, 'UTF-8')));
  45. var_dump(_bin2hex(htmlspecialchars("\xed\xbf\xbf", ENT_QUOTES, 'UTF-8')));
  46. var_dump(_bin2hex(htmlspecialchars("\xee\x80\x80", ENT_QUOTES, 'UTF-8')));
  47. echo "--\n";
  48. echo "Shift_JIS: non-lead byte >= 0x80\n";
  49. var_dump(_bin2hex(htmlspecialchars("\x80", ENT_QUOTES, 'Shift_JIS')));
  50. foreach (array_map('chr', range(0xa0, 0xdf)) as $c) {
  51. var_dump(_bin2hex(htmlspecialchars($c, ENT_QUOTES, 'Shift_JIS')));
  52. }
  53. var_dump(_bin2hex(htmlspecialchars("\xfd", ENT_QUOTES, 'Shift_JIS')));
  54. var_dump(_bin2hex(htmlspecialchars("\xfe", ENT_QUOTES, 'Shift_JIS')));
  55. var_dump(_bin2hex(htmlspecialchars("\xff", ENT_QUOTES, 'Shift_JIS')));
  56. echo "--\n";
  57. echo "Shift_JIS: incomplete / invalid multibyte sequences\n";
  58. foreach (array_map('chr', array_merge(range(0x81, 0x9f), range(0xe0, 0xfc))) as $c) {
  59. var_dump(_bin2hex(htmlspecialchars("$c", ENT_QUOTES, 'Shift_JIS')));
  60. var_dump(_bin2hex(htmlspecialchars("$c\x3f", ENT_QUOTES, 'Shift_JIS')));
  61. var_dump(_bin2hex(htmlspecialchars("$c\x40", ENT_QUOTES, 'Shift_JIS')));
  62. var_dump(_bin2hex(htmlspecialchars("$c\x7e", ENT_QUOTES, 'Shift_JIS')));
  63. var_dump(_bin2hex(htmlspecialchars("$c\x7f", ENT_QUOTES, 'Shift_JIS')));
  64. var_dump(_bin2hex(htmlspecialchars("$c\x80", ENT_QUOTES, 'Shift_JIS')));
  65. var_dump(_bin2hex(htmlspecialchars("$c\xfc", ENT_QUOTES, 'Shift_JIS')));
  66. var_dump(_bin2hex(htmlspecialchars("$c\xfd", ENT_QUOTES, 'Shift_JIS')));
  67. var_dump(_bin2hex(htmlspecialchars("$c\xfe", ENT_QUOTES, 'Shift_JIS')));
  68. var_dump(_bin2hex(htmlspecialchars("$c\xff", ENT_QUOTES, 'Shift_JIS')));
  69. }
  70. echo "--\n";
  71. echo "EUC-JP: non-lead byte >= 0x80\n";
  72. foreach (array_map('chr', array_merge(range(0x80, 0x8d), range(0x90, 0x9f))) as $c) {
  73. var_dump(_bin2hex(htmlspecialchars($c, ENT_QUOTES, 'EUC-JP')));
  74. }
  75. var_dump(_bin2hex(htmlspecialchars("\xff", ENT_QUOTES, 'EUC-JP')));
  76. // EUC-JP: control codes that are virtually lead bytes
  77. var_dump(_bin2hex(htmlspecialchars("\x8e", ENT_QUOTES, 'EUC-JP')));
  78. var_dump(_bin2hex(htmlspecialchars("\x8f", ENT_QUOTES, 'EUC-JP')));
  79. var_dump(_bin2hex(htmlspecialchars("\x8e\xa1", ENT_QUOTES, 'EUC-JP')));
  80. var_dump(_bin2hex(htmlspecialchars("\x8f\xa1", ENT_QUOTES, 'EUC-JP')));
  81. var_dump(_bin2hex(htmlspecialchars("\x8e\xa1\xa3", ENT_QUOTES, 'EUC-JP')));
  82. var_dump(_bin2hex(htmlspecialchars("\x8f\xa1\xa3", ENT_QUOTES, 'EUC-JP')));
  83. echo "--\n";
  84. echo "EUC-JP: incomplete / invalid multibyte sequences\n";
  85. foreach (array_map('chr', array_merge(range(0xa1, 0xfe))) as $c) {
  86. var_dump(_bin2hex(htmlspecialchars("$c", ENT_QUOTES, 'EUC-JP')));
  87. var_dump(_bin2hex(htmlspecialchars("$c\x26", ENT_QUOTES, 'EUC-JP')));
  88. var_dump(_bin2hex(htmlspecialchars("$c\x80", ENT_QUOTES, 'EUC-JP')));
  89. var_dump(_bin2hex(htmlspecialchars("$c\xa0", ENT_QUOTES, 'EUC-JP')));
  90. var_dump(_bin2hex(htmlspecialchars("$c\xa1", ENT_QUOTES, 'EUC-JP')));
  91. var_dump(_bin2hex(htmlspecialchars("$c\xfe", ENT_QUOTES, 'EUC-JP')));
  92. var_dump(_bin2hex(htmlspecialchars("$c\xff", ENT_QUOTES, 'EUC-JP')));
  93. var_dump(_bin2hex(htmlspecialchars("\x8e$c", ENT_QUOTES, 'EUC-JP')));
  94. var_dump(_bin2hex(htmlspecialchars("\x8e$c\x26", ENT_QUOTES, 'EUC-JP')));
  95. var_dump(_bin2hex(htmlspecialchars("\x8e$c\x80", ENT_QUOTES, 'EUC-JP')));
  96. var_dump(_bin2hex(htmlspecialchars("\x8e$c\xa0", ENT_QUOTES, 'EUC-JP')));
  97. var_dump(_bin2hex(htmlspecialchars("\x8e$c\xa1", ENT_QUOTES, 'EUC-JP')));
  98. var_dump(_bin2hex(htmlspecialchars("\x8e$c\xfe", ENT_QUOTES, 'EUC-JP')));
  99. var_dump(_bin2hex(htmlspecialchars("\x8e$c\xff", ENT_QUOTES, 'EUC-JP')));
  100. var_dump(_bin2hex(htmlspecialchars("\x8f$c", ENT_QUOTES, 'EUC-JP')));
  101. var_dump(_bin2hex(htmlspecialchars("\x8f$c\x26", ENT_QUOTES, 'EUC-JP')));
  102. var_dump(_bin2hex(htmlspecialchars("\x8f$c\x80", ENT_QUOTES, 'EUC-JP')));
  103. var_dump(_bin2hex(htmlspecialchars("\x8f$c\xa0", ENT_QUOTES, 'EUC-JP')));
  104. var_dump(_bin2hex(htmlspecialchars("\x8f$c\xa1", ENT_QUOTES, 'EUC-JP')));
  105. var_dump(_bin2hex(htmlspecialchars("\x8f$c\xfe", ENT_QUOTES, 'EUC-JP')));
  106. var_dump(_bin2hex(htmlspecialchars("\x8f$c\xff", ENT_QUOTES, 'EUC-JP')));
  107. }
  108. echo "--\n";
  109. echo "BIG5: non-lead byte >= 0x80\n";
  110. var_dump(_bin2hex(htmlspecialchars("\x80", ENT_QUOTES, 'BIG5')));
  111. var_dump(_bin2hex(htmlspecialchars("\xff", ENT_QUOTES, 'BIG5')));
  112. echo "--\n";
  113. echo "BIG5: incomplete / invalid multibyte sequences\n";
  114. foreach (array_map('chr', range(0x81, 0xfe)) as $c) {
  115. var_dump(_bin2hex(htmlspecialchars("$c", ENT_QUOTES, 'BIG5')));
  116. var_dump(_bin2hex(htmlspecialchars("$c\x3f", ENT_QUOTES, 'BIG5')));
  117. var_dump(_bin2hex(htmlspecialchars("$c\x40", ENT_QUOTES, 'BIG5')));
  118. var_dump(_bin2hex(htmlspecialchars("$c\x7e", ENT_QUOTES, 'BIG5')));
  119. var_dump(_bin2hex(htmlspecialchars("$c\x7f", ENT_QUOTES, 'BIG5')));
  120. var_dump(_bin2hex(htmlspecialchars("$c\x80", ENT_QUOTES, 'BIG5')));
  121. var_dump(_bin2hex(htmlspecialchars("$c\xa0", ENT_QUOTES, 'BIG5')));
  122. var_dump(_bin2hex(htmlspecialchars("$c\xa1", ENT_QUOTES, 'BIG5')));
  123. var_dump(_bin2hex(htmlspecialchars("$c\xfe", ENT_QUOTES, 'BIG5')));
  124. var_dump(_bin2hex(htmlspecialchars("$c\xff", ENT_QUOTES, 'BIG5')));
  125. }
  126. ?>