PageRenderTime 57ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/hphp/runtime/base/string-util.cpp

https://github.com/tstarling/hiphop-php
C++ | 461 lines | 374 code | 59 blank | 28 comment | 73 complexity | f756ec2128e55c86644adc70114721bf MD5 | raw file
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include "hphp/runtime/base/string-util.h"
  17. #include <algorithm>
  18. #include <vector>
  19. #include "hphp/zend/zend-html.h"
  20. #include "hphp/runtime/base/zend-string.h"
  21. #include "hphp/runtime/base/zend-url.h"
  22. #include "hphp/runtime/base/runtime-error.h"
  23. #include "hphp/runtime/base/array-iterator.h"
  24. #include "hphp/runtime/base/builtin-functions.h"
  25. #include "hphp/runtime/base/container-functions.h"
  26. namespace HPHP {
  27. ///////////////////////////////////////////////////////////////////////////////
  28. // manipulations
  29. String StringUtil::Pad(const String& input, int final_length,
  30. const String& pad_string /* = " " */,
  31. PadType type /* = PadType::Right */) {
  32. int len = input.size();
  33. char *ret = string_pad(input.data(), len, final_length, pad_string.data(),
  34. pad_string.size(), static_cast<int>(type));
  35. if (ret) return String(ret, len, AttachString);
  36. return String();
  37. }
  38. String StringUtil::StripHTMLTags(const String& input,
  39. const String& allowable_tags /* = "" */) {
  40. if (input.empty()) return input;
  41. int len = input.size();
  42. char *ret = string_strip_tags(input.data(), len, allowable_tags.data(),
  43. allowable_tags.size(), false);
  44. return String(ret, len, AttachString);
  45. }
  46. ///////////////////////////////////////////////////////////////////////////////
  47. // splits/joins
  48. Variant StringUtil::Explode(const String& input, const String& delimiter,
  49. int limit /* = 0x7FFFFFFF */) {
  50. if (delimiter.empty()) {
  51. throw_invalid_argument("delimiter: (empty)");
  52. return false;
  53. }
  54. Array ret(Array::Create());
  55. if (input.empty()) {
  56. if (limit >= 0) {
  57. ret.append("");
  58. }
  59. return ret;
  60. }
  61. if (limit > 1) {
  62. int pos = input.find(delimiter);
  63. if (pos < 0) {
  64. ret.append(input);
  65. } else {
  66. int len = delimiter.size();
  67. int pos0 = 0;
  68. do {
  69. ret.append(input.substr(pos0, pos - pos0));
  70. pos += len;
  71. pos0 = pos;
  72. } while ((pos = input.find(delimiter, pos)) >= 0 && --limit > 1);
  73. if (pos0 <= input.size()) {
  74. ret.append(input.substr(pos0));
  75. }
  76. }
  77. } else if (limit < 0) {
  78. int pos = input.find(delimiter);
  79. if (pos >= 0) {
  80. std::vector<int> positions;
  81. int len = delimiter.size();
  82. int pos0 = 0;
  83. int found = 0;
  84. do {
  85. positions.push_back(pos0);
  86. positions.push_back(pos - pos0);
  87. pos += len;
  88. pos0 = pos;
  89. found++;
  90. } while ((pos = input.find(delimiter, pos)) >= 0);
  91. if (pos0 <= input.size()) {
  92. positions.push_back(pos0);
  93. positions.push_back(input.size() - pos0);
  94. found++;
  95. }
  96. int iMax = (found + limit) << 1;
  97. for (int i = 0; i < iMax; i += 2) {
  98. ret.append(input.substr(positions[i], positions[i+1]));
  99. }
  100. } // else we have negative limit and delimiter not found
  101. } else {
  102. ret.append(input);
  103. }
  104. return ret;
  105. }
  106. String StringUtil::Implode(const Variant& items, const String& delim) {
  107. if (!isContainer(items)) {
  108. throw_param_is_not_container();
  109. }
  110. int size = getContainerSize(items);
  111. if (size == 0) return "";
  112. String* sitems = (String*)smart_malloc(size * sizeof(String));
  113. int len = 0;
  114. int lenDelim = delim.size();
  115. int i = 0;
  116. for (ArrayIter iter(items); iter; ++iter) {
  117. new (&sitems[i]) String(iter.second().toString());
  118. len += sitems[i].size() + lenDelim;
  119. i++;
  120. }
  121. len -= lenDelim; // always one delimiter less than count of items
  122. assert(i == size);
  123. String s = String(len, ReserveString);
  124. char *buffer = s.bufferSlice().ptr;
  125. const char *sdelim = delim.data();
  126. char *p = buffer;
  127. for (int i = 0; i < size; i++) {
  128. String &item = sitems[i];
  129. if (i && lenDelim) {
  130. memcpy(p, sdelim, lenDelim);
  131. p += lenDelim;
  132. }
  133. int lenItem = item.size();
  134. if (lenItem) {
  135. memcpy(p, item.data(), lenItem);
  136. p += lenItem;
  137. }
  138. sitems[i].~String();
  139. }
  140. smart_free(sitems);
  141. assert(p - buffer == len);
  142. return s.setSize(len);
  143. }
  144. Variant StringUtil::Split(const String& str, int split_length /* = 1 */) {
  145. if (split_length <= 0) {
  146. throw_invalid_argument(
  147. "The length of each segment must be greater than zero"
  148. );
  149. return false;
  150. }
  151. Array ret;
  152. int len = str.size();
  153. if (split_length >= len) {
  154. ret.append(str);
  155. } else {
  156. for (int i = 0; i < len; i += split_length) {
  157. ret.append(str.substr(i, split_length));
  158. }
  159. }
  160. return ret;
  161. }
  162. Variant StringUtil::ChunkSplit(const String& body, int chunklen /* = 76 */,
  163. const String& end /* = "\r\n" */) {
  164. if (chunklen <= 0) {
  165. throw_invalid_argument("chunklen: (non-positive)");
  166. return false;
  167. }
  168. String ret;
  169. int len = body.size();
  170. if (chunklen >= len) {
  171. ret = body;
  172. ret += end;
  173. } else {
  174. char *chunked = string_chunk_split(body.data(), len, end.c_str(),
  175. end.size(), chunklen);
  176. return String(chunked, len, AttachString);
  177. }
  178. return ret;
  179. }
  180. ///////////////////////////////////////////////////////////////////////////////
  181. // encoding/decoding
  182. String StringUtil::HtmlEncode(const String& input, QuoteStyle quoteStyle,
  183. const char *charset, bool dEncode, bool htmlEnt) {
  184. return HtmlEncode(input, static_cast<int64_t>(quoteStyle),
  185. charset, dEncode, htmlEnt);
  186. }
  187. String StringUtil::HtmlEncode(const String& input, const int64_t qsBitmask,
  188. const char *charset, bool dEncode, bool htmlEnt) {
  189. if (input.empty()) return input;
  190. assert(charset);
  191. bool utf8 = true;
  192. if (strcasecmp(charset, "ISO-8859-1") == 0) {
  193. utf8 = false;
  194. } else if (strcasecmp(charset, "UTF-8")) {
  195. throw NotImplementedException(charset);
  196. }
  197. int len = input.size();
  198. char *ret = string_html_encode(input.data(), len,
  199. qsBitmask, utf8, dEncode, htmlEnt);
  200. if (!ret) {
  201. return empty_string;
  202. }
  203. return String(ret, len, AttachString);
  204. }
  205. #define A1(v, ch) ((v)|((ch) & 64 ? 0 : 1uLL<<((ch)&63)))
  206. #define A2(v, ch) ((v)|((ch) & 64 ? 1uLL<<((ch)&63) : 0))
  207. static const AsciiMap mapNoQuotes = {
  208. { A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'),
  209. A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@') }
  210. };
  211. static const AsciiMap mapDoubleQuotes = {
  212. { A1(A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'),
  213. A2(A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@'), '"') }
  214. };
  215. static const AsciiMap mapBothQuotes = {
  216. { A1(A1(A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'), '\''),
  217. A2(A2(A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'), '\'') }
  218. };
  219. static const AsciiMap mapNothing = {};
  220. String StringUtil::HtmlEncodeExtra(const String& input, QuoteStyle quoteStyle,
  221. const char *charset, bool nbsp,
  222. Array extra) {
  223. if (input.empty()) return input;
  224. assert(charset);
  225. int flags = STRING_HTML_ENCODE_UTF8;
  226. if (nbsp) {
  227. flags |= STRING_HTML_ENCODE_NBSP;
  228. }
  229. if (RuntimeOption::Utf8izeReplace) {
  230. flags |= STRING_HTML_ENCODE_UTF8IZE_REPLACE;
  231. }
  232. if (!*charset || strcasecmp(charset, "UTF-8") == 0) {
  233. } else if (strcasecmp(charset, "ISO-8859-1") == 0) {
  234. flags &= ~STRING_HTML_ENCODE_UTF8;
  235. } else {
  236. throw NotImplementedException(charset);
  237. }
  238. const AsciiMap *am;
  239. AsciiMap tmp;
  240. switch (quoteStyle) {
  241. case QuoteStyle::FBUtf8Only:
  242. am = &mapNothing;
  243. flags |= STRING_HTML_ENCODE_HIGH;
  244. break;
  245. case QuoteStyle::FBUtf8:
  246. am = &mapBothQuotes;
  247. flags |= STRING_HTML_ENCODE_HIGH;
  248. break;
  249. case QuoteStyle::Both:
  250. am = &mapBothQuotes;
  251. break;
  252. case QuoteStyle::Double:
  253. am = &mapDoubleQuotes;
  254. break;
  255. case QuoteStyle::No:
  256. am = &mapNoQuotes;
  257. break;
  258. default:
  259. am = &mapNothing;
  260. raise_error("Unknown quote style: %d", (int)quoteStyle);
  261. }
  262. if (quoteStyle != QuoteStyle::FBUtf8Only && extra.toBoolean()) {
  263. tmp = *am;
  264. am = &tmp;
  265. for (ArrayIter iter(extra); iter; ++iter) {
  266. String item = iter.second().toString();
  267. char c = item.data()[0];
  268. tmp.map[c & 64 ? 1 : 0] |= 1uLL << (c & 63);
  269. }
  270. }
  271. int len = input.size();
  272. char *ret = string_html_encode_extra(input.data(), len,
  273. (StringHtmlEncoding)flags, am);
  274. if (!ret) {
  275. raise_error("HtmlEncode called on too large input (%d)", len);
  276. }
  277. return String(ret, len, AttachString);
  278. }
  279. String StringUtil::HtmlDecode(const String& input, QuoteStyle quoteStyle,
  280. const char *charset, bool all) {
  281. if (input.empty()) return input;
  282. assert(charset);
  283. int len = input.size();
  284. char *ret = string_html_decode(input.data(), len,
  285. quoteStyle != QuoteStyle::No,
  286. quoteStyle == QuoteStyle::Both,
  287. charset, all);
  288. if (!ret) {
  289. // null iff charset was not recognized
  290. throw NotImplementedException(charset);
  291. // (charset is not null, see assertion above)
  292. }
  293. return String(ret, len, AttachString);
  294. }
  295. String StringUtil::QuotedPrintableEncode(const String& input) {
  296. if (input.empty()) return input;
  297. int len = input.size();
  298. char *ret = string_quoted_printable_encode(input.data(), len);
  299. return String(ret, len, AttachString);
  300. }
  301. String StringUtil::QuotedPrintableDecode(const String& input) {
  302. if (input.empty()) return input;
  303. int len = input.size();
  304. char *ret = string_quoted_printable_decode(input.data(), len, false);
  305. return String(ret, len, AttachString);
  306. }
  307. String StringUtil::UUEncode(const String& input) {
  308. if (input.empty()) return input;
  309. int len;
  310. char *encoded = string_uuencode(input.data(), input.size(), len);
  311. return String(encoded, len, AttachString);
  312. }
  313. String StringUtil::UUDecode(const String& input) {
  314. if (!input.empty()) {
  315. int len;
  316. char *decoded = string_uudecode(input.data(), input.size(), len);
  317. if (decoded) {
  318. return String(decoded, len, AttachString);
  319. }
  320. }
  321. return String();
  322. }
  323. String StringUtil::Base64Encode(const String& input) {
  324. int len = input.size();
  325. char *ret = string_base64_encode(input.data(), len);
  326. return String(ret, len, AttachString);
  327. }
  328. String StringUtil::Base64Decode(const String& input,
  329. bool strict /* = false */) {
  330. int len = input.size();
  331. char *ret = string_base64_decode(input.data(), len, strict);
  332. return String(ret, len, AttachString);
  333. }
  334. String StringUtil::UrlEncode(const String& input,
  335. bool encodePlus /* = true */) {
  336. int len = input.size();
  337. char *ret;
  338. if (encodePlus) {
  339. ret = url_encode(input.data(), len);
  340. } else {
  341. ret = url_raw_encode(input.data(), len);
  342. }
  343. return String(ret, len, AttachString);
  344. }
  345. String StringUtil::UrlDecode(const String& input,
  346. bool decodePlus /* = true */) {
  347. int len = input.size();
  348. char *ret;
  349. if (decodePlus) {
  350. ret = url_decode(input.data(), len);
  351. } else {
  352. ret = url_raw_decode(input.data(), len);
  353. }
  354. return String(ret, len, AttachString);
  355. }
  356. ///////////////////////////////////////////////////////////////////////////////
  357. // formatting
  358. String StringUtil::MoneyFormat(const char *format, double value) {
  359. assert(format);
  360. char *formatted = string_money_format(format, value);
  361. return formatted ? String(formatted, AttachString) : String();
  362. }
  363. ///////////////////////////////////////////////////////////////////////////////
  364. // hashing
  365. String StringUtil::Translate(const String& input, const String& from,
  366. const String& to) {
  367. if (input.empty()) return input;
  368. int len = input.size();
  369. String retstr(len, ReserveString);
  370. char *ret = retstr.bufferSlice().ptr;
  371. memcpy(ret, input.data(), len);
  372. auto trlen = std::min(from.size(), to.size());
  373. string_translate(ret, len, from.data(), to.data(), trlen);
  374. return retstr.setSize(len);
  375. }
  376. String StringUtil::ROT13(const String& input) {
  377. if (input.empty()) return input;
  378. return String(string_rot13(input.data(), input.size()),
  379. input.size(), AttachString);
  380. }
  381. int64_t StringUtil::CRC32(const String& input) {
  382. return string_crc32(input.data(), input.size());
  383. }
  384. String StringUtil::Crypt(const String& input, const char *salt /* = "" */) {
  385. return String(string_crypt(input.c_str(), salt), AttachString);
  386. }
  387. String StringUtil::MD5(const String& input, bool raw /* = false */) {
  388. Md5Digest md5(input.data(), input.size());
  389. auto const rawLen = sizeof(md5.digest);
  390. if (raw) return String((char*)md5.digest, rawLen, CopyString);
  391. auto const hexLen = rawLen * 2;
  392. String hex(hexLen, ReserveString);
  393. string_bin2hex((char*)md5.digest, rawLen, hex.bufferSlice().ptr);
  394. return hex.setSize(hexLen);
  395. }
  396. String StringUtil::SHA1(const String& input, bool raw /* = false */) {
  397. int len;
  398. char *ret = string_sha1(input.data(), input.size(), raw, len);
  399. return String(ret, len, AttachString);
  400. }
  401. ///////////////////////////////////////////////////////////////////////////////
  402. }