PageRenderTime 42ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/branches/v1.6.6/Classes/PHPExcel/Shared/String.php

#
PHP | 270 lines | 95 code | 32 blank | 143 comment | 17 complexity | 01afb8ed4ec0c1a4aad696cb521a2866 MD5 | raw file
Possible License(s): AGPL-1.0, LGPL-2.0, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. <?php
  2. /**
  3. * PHPExcel
  4. *
  5. * Copyright (c) 2006 - 2009 PHPExcel
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * @category PHPExcel
  22. * @package PHPExcel_Shared
  23. * @copyright Copyright (c) 2006 - 2009 PHPExcel (http://www.codeplex.com/PHPExcel)
  24. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
  25. * @version ##VERSION##, ##DATE##
  26. */
  27. /**
  28. * PHPExcel_Shared_String
  29. *
  30. * @category PHPExcel
  31. * @package PHPExcel_Shared
  32. * @copyright Copyright (c) 2006 - 2009 PHPExcel (http://www.codeplex.com/PHPExcel)
  33. */
  34. class PHPExcel_Shared_String
  35. {
  36. /**
  37. * Control characters array
  38. *
  39. * @var string[]
  40. */
  41. private static $_controlCharacters = array();
  42. /**
  43. * Is mbstring extension avalable?
  44. *
  45. * @var boolean
  46. */
  47. private static $_isMbstringEnabled;
  48. /**
  49. * Is iconv extension avalable?
  50. *
  51. * @var boolean
  52. */
  53. private static $_isIconvEnabled;
  54. /**
  55. * Build control characters array
  56. */
  57. private static function _buildControlCharacters() {
  58. for ($i = 0; $i <= 19; ++$i) {
  59. if ($i != 9 && $i != 10 && $i != 13) {
  60. $find = '_x' . sprintf('%04s' , strtoupper(dechex($i))) . '_';
  61. $replace = chr($i);
  62. self::$_controlCharacters[$find] = $replace;
  63. }
  64. }
  65. }
  66. /**
  67. * Get whether mbstring extension is available
  68. *
  69. * @return boolean
  70. */
  71. public static function getIsMbstringEnabled()
  72. {
  73. if (isset(self::$_isMbstringEnabled)) {
  74. return self::$_isMbstringEnabled;
  75. }
  76. self::$_isMbstringEnabled = function_exists('mb_convert_encoding') ?
  77. true : false;
  78. return self::$_isMbstringEnabled;
  79. }
  80. /**
  81. * Get whether iconv extension is available
  82. *
  83. * @return boolean
  84. */
  85. public static function getIsIconvEnabled()
  86. {
  87. if (isset(self::$_isIconvEnabled)) {
  88. return self::$_isIconvEnabled;
  89. }
  90. self::$_isIconvEnabled = function_exists('iconv') ?
  91. true : false;
  92. return self::$_isIconvEnabled;
  93. }
  94. /**
  95. * Convert from OpenXML escaped control character to PHP control character
  96. *
  97. * Excel 2007 team:
  98. * ----------------
  99. * That's correct, control characters are stored directly in the shared-strings table.
  100. * We do encode characters that cannot be represented in XML using the following escape sequence:
  101. * _xHHHH_ where H represents a hexadecimal character in the character's value...
  102. * So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
  103. * element or in the shared string <t> element.
  104. *
  105. * @param string $value Value to unescape
  106. * @return string
  107. */
  108. public static function ControlCharacterOOXML2PHP($value = '') {
  109. if(empty(self::$_controlCharacters)) {
  110. self::_buildControlCharacters();
  111. }
  112. return str_replace( array_keys(self::$_controlCharacters), array_values(self::$_controlCharacters), $value );
  113. }
  114. /**
  115. * Convert from PHP control character to OpenXML escaped control character
  116. *
  117. * Excel 2007 team:
  118. * ----------------
  119. * That's correct, control characters are stored directly in the shared-strings table.
  120. * We do encode characters that cannot be represented in XML using the following escape sequence:
  121. * _xHHHH_ where H represents a hexadecimal character in the character's value...
  122. * So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
  123. * element or in the shared string <t> element.
  124. *
  125. * @param string $value Value to escape
  126. * @return string
  127. */
  128. public static function ControlCharacterPHP2OOXML($value = '') {
  129. if(empty(self::$_controlCharacters)) {
  130. self::_buildControlCharacters();
  131. }
  132. return str_replace( array_values(self::$_controlCharacters), array_keys(self::$_controlCharacters), $value );
  133. }
  134. /**
  135. * Check if a string contains UTF8 data
  136. *
  137. * @param string $value
  138. * @return boolean
  139. */
  140. public static function IsUTF8($value = '') {
  141. return utf8_encode(utf8_decode($value)) === $value;
  142. }
  143. /**
  144. * Formats a numeric value as a string for output in various output writers
  145. *
  146. * @param mixed $value
  147. * @return string
  148. */
  149. public static function FormatNumber($value) {
  150. return number_format($value, 2, '.', '');
  151. }
  152. /**
  153. * Converts a UTF-8 string into BIFF8 Unicode string data (8-bit string length)
  154. * Writes the string using uncompressed notation, no rich text, no Asian phonetics
  155. * If mbstring extension is not available, ASCII is assumed, and compressed notation is used
  156. * although this will give wrong results for non-ASCII strings
  157. * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3
  158. *
  159. * @param string $value UTF-8 encoded string
  160. * @return string
  161. */
  162. public static function UTF8toBIFF8UnicodeShort($value)
  163. {
  164. // character count
  165. $ln = self::CountCharacters($value, 'UTF-8');
  166. // option flags
  167. $opt = (self::getIsMbstringEnabled() || self::getIsIconvEnabled()) ?
  168. 0x0001 : 0x0000;
  169. // characters
  170. $chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8');
  171. $data = pack('CC', $ln, $opt) . $chars;
  172. return $data;
  173. }
  174. /**
  175. * Converts a UTF-8 string into BIFF8 Unicode string data (16-bit string length)
  176. * Writes the string using uncompressed notation, no rich text, no Asian phonetics
  177. * If mbstring extension is not available, ASCII is assumed, and compressed notation is used
  178. * although this will give wrong results for non-ASCII strings
  179. * see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3
  180. *
  181. * @param string $value UTF-8 encoded string
  182. * @return string
  183. */
  184. public static function UTF8toBIFF8UnicodeLong($value)
  185. {
  186. // character count
  187. $ln = self::CountCharacters($value, 'UTF-8');
  188. // option flags
  189. $opt = (self::getIsMbstringEnabled() || self::getIsIconvEnabled()) ?
  190. 0x0001 : 0x0000;
  191. // characters
  192. $chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8');
  193. $data = pack('vC', $ln, $opt) . $chars;
  194. return $data;
  195. }
  196. /**
  197. * Convert string from one encoding to another. First try mbstring, then iconv, or no convertion
  198. *
  199. * @param string $value
  200. * @param string $to Encoding to convert to, e.g. 'UTF-8'
  201. * @param string $from Encoding to convert from, e.g. 'UTF-16LE'
  202. * @return string
  203. */
  204. public static function ConvertEncoding($value, $to, $from)
  205. {
  206. if (self::getIsMbstringEnabled()) {
  207. $value = mb_convert_encoding($value, $to, $from);
  208. return $value;
  209. }
  210. if (self::getIsIconvEnabled()) {
  211. $value = iconv($from, $to, $value);
  212. return $value;
  213. }
  214. // else, no conversion
  215. return $value;
  216. }
  217. /**
  218. * Get character count. First try mbstring, then iconv, finally strlen
  219. *
  220. * @param string $value
  221. * @param string $enc Encoding
  222. * @return int Character count
  223. */
  224. public static function CountCharacters($value, $enc)
  225. {
  226. if (self::getIsMbstringEnabled()) {
  227. $count = mb_strlen($value, $enc);
  228. return $count;
  229. }
  230. if (self::getIsIconvEnabled()) {
  231. $count = iconv_strlen($value, $enc);
  232. return $count;
  233. }
  234. // else strlen
  235. $count = strlen($value);
  236. return $count;
  237. }
  238. }