PageRenderTime 48ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/build/charset.php

http://github.com/simplepie/simplepie
PHP | 167 lines | 139 code | 18 blank | 10 comment | 19 complexity | 4ca1dad88e5d05f61a1407dab56e0357 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. require_once '../autoloader.php';
  3. function normalize_character_set($charset)
  4. {
  5. return strtolower(preg_replace('/(?:[^a-zA-Z0-9]+|([^0-9])0+)/', '\1', $charset));
  6. }
  7. function build_character_set_list()
  8. {
  9. $file = new SimplePie_File('http://www.iana.org/assignments/character-sets');
  10. if (!$file->success && !($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
  11. {
  12. return false;
  13. }
  14. $data = explode("\n", $file->body);
  15. unset($file);
  16. foreach ($data as $line)
  17. {
  18. // New character set
  19. if (preg_match('/^Name:\s+(\S+)/', $line, $match))
  20. {
  21. // If we already have one, push it on to the array
  22. if (isset($aliases))
  23. {
  24. foreach ($aliases as &$alias)
  25. {
  26. $alias = normalize_character_set($alias);
  27. }
  28. $charsets[$preferred] = array_unique($aliases);
  29. natsort($charsets[$preferred]);
  30. }
  31. $aliases = array($match[1]);
  32. $preferred = $match[1];
  33. }
  34. // Another alias
  35. elseif (preg_match('/^Alias:\s+(\S+)(\s+\(preferred MIME name\))?\s*$/', $line, $match))
  36. {
  37. if ($match[1] !== 'None')
  38. {
  39. $aliases[] = $match[1];
  40. if (isset($match[2]))
  41. {
  42. $preferred = $match[1];
  43. }
  44. }
  45. }
  46. }
  47. // Compatibility replacements
  48. // From http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#misinterpreted-for-compatibility
  49. $compat = array(
  50. 'EUC-KR' => 'windows-949',
  51. 'GB2312' => 'GBK',
  52. 'GB_2312-80' => 'GBK',
  53. 'ISO-8859-1' => 'windows-1252',
  54. 'ISO-8859-9' => 'windows-1254',
  55. 'ISO-8859-11' => 'windows-874',
  56. 'KS_C_5601-1987' => 'windows-949',
  57. 'Shift_JIS' => 'Windows-31J',
  58. 'TIS-620' => 'windows-874',
  59. //'US-ASCII' => 'windows-1252',
  60. );
  61. foreach ($compat as $real => $replace)
  62. {
  63. if (isset($charsets[$real]) && isset($charsets[$replace]))
  64. {
  65. $charsets[$replace] = array_merge($charsets[$replace], $charsets[$real]);
  66. unset($charsets[$real]);
  67. }
  68. elseif (isset($charsets[$real]))
  69. {
  70. $charsets[$replace] = $charsets[$real];
  71. $charsets[$replace][] = normalize_character_set($replace);
  72. unset($charsets[$real]);
  73. }
  74. else
  75. {
  76. $charsets[$replace][] = normalize_character_set($real);
  77. }
  78. $charsets[$replace] = array_unique($charsets[$replace]);
  79. natsort($charsets[$replace]);
  80. }
  81. // Sort it
  82. uksort($charsets, 'strnatcasecmp');
  83. // Check that nothing matches more than one
  84. $all = call_user_func_array('array_merge', $charsets);
  85. $all_count = array_count_values($all);
  86. if (max($all_count) > 1)
  87. {
  88. echo "Duplicated charsets:\n";
  89. foreach ($all_count as $charset => $count)
  90. {
  91. if ($count > 1)
  92. {
  93. echo "$charset\n";
  94. }
  95. }
  96. }
  97. // And we're done!
  98. return $charsets;
  99. }
  100. function charset($charset)
  101. {
  102. $normalized_charset = normalize_character_set($charset);
  103. if ($charsets = build_character_set_list())
  104. {
  105. foreach ($charsets as $preferred => $aliases)
  106. {
  107. if (in_array($normalized_charset, $aliases))
  108. {
  109. return $preferred;
  110. }
  111. }
  112. return $charset;
  113. }
  114. return false;
  115. }
  116. function build_function()
  117. {
  118. if ($charsets = build_character_set_list())
  119. {
  120. $return = <<<EOF
  121. public static function encoding(\$charset)
  122. {
  123. // Normalization from UTS #22
  124. switch (strtolower(preg_replace('/(?:[^a-zA-Z0-9]+|([^0-9])0+)/', '\\1', \$charset)))
  125. {
  126. EOF;
  127. foreach ($charsets as $preferred => $aliases)
  128. {
  129. foreach ($aliases as $alias)
  130. {
  131. $return .= "\t\tcase " . var_export($alias, true) . ":\n";
  132. }
  133. $return .= "\t\t\treturn " . var_export($preferred, true) . ";\n\n";
  134. }
  135. $return .= <<<EOF
  136. default:
  137. return \$charset;
  138. }
  139. }
  140. EOF;
  141. return $return;
  142. }
  143. return false;
  144. }
  145. if (php_sapi_name() === 'cli' && realpath($_SERVER['argv'][0]) === __FILE__)
  146. {
  147. echo build_function();
  148. }
  149. ?>