PageRenderTime 26ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/hphp/runtime/ext/icu/ext_icu_collator.cpp

https://gitlab.com/Blueprint-Marketing/hhvm
C++ | 369 lines | 304 code | 49 blank | 16 comment | 31 complexity | 1f4a6b2968d804cf5ed15accd22dea28 MD5 | raw file
  1. #include "hphp/runtime/ext/icu/ext_icu_collator.h"
  2. #include "hphp/runtime/base/builtin-functions.h"
  3. #include "hphp/runtime/base/zend-collator.h"
  4. #include "hphp/runtime/base/zend-qsort.h"
  5. namespace HPHP { namespace Intl {
  6. /////////////////////////////////////////////////////////////////////////////
  7. // class Collator
  8. enum CollatorSort {
  9. SORT_REGULAR = 0,
  10. SORT_STRING = 1,
  11. SORT_NUMERIC = 2,
  12. };
  13. const StaticString s_Collator("Collator");
  14. #define FETCH_COL(dest, src, ret) \
  15. auto dest = Collator::Get(src); \
  16. if (!dest) { \
  17. raise_recoverable_error("Collator not initialized"); \
  18. return ret; \
  19. }
  20. static void HHVM_METHOD(Collator, __construct, const String& locale) {
  21. auto data = Native::data<Collator>(this_);
  22. data->clearError();
  23. if (!locale.empty()) {
  24. UErrorCode error = U_ZERO_ERROR;
  25. data->setCollator(ucol_open(locale.c_str(), &error));
  26. if (U_SUCCESS(error)) {
  27. return;
  28. }
  29. /* Fallthrough and use default collator */
  30. }
  31. data->setError(U_USING_FALLBACK_WARNING);
  32. UErrorCode error = U_ZERO_ERROR;
  33. data->setCollator(ucol_open(uloc_getDefault(), &error));
  34. if (U_FAILURE(error)) {
  35. data->setError(error, "collator_create: unable to open ICU collator");
  36. data->setCollator(nullptr);
  37. return;
  38. }
  39. }
  40. static bool HHVM_METHOD(Collator, asort, VRefParam arr, int64_t flag) {
  41. FETCH_COL(data, this_, false);
  42. if (!arr.isArray()) {
  43. throw_expected_array_exception("Collator::asort");
  44. return false;
  45. }
  46. data->clearError();
  47. bool ret = collator_asort(arr, flag, true, data->collator(), data);
  48. if (U_FAILURE(data->getErrorCode())) {
  49. return false;
  50. }
  51. return ret;
  52. }
  53. static Variant HHVM_METHOD(Collator, compare, const Variant& str1, const Variant& str2) {
  54. FETCH_COL(data, this_, false);
  55. data->clearError();
  56. UErrorCode error = U_ZERO_ERROR;
  57. icu::UnicodeString ustr1(u16(str1.toString(), error));
  58. if (U_FAILURE(error)) {
  59. data->setError(error);
  60. return false;
  61. }
  62. error = U_ZERO_ERROR;
  63. icu::UnicodeString ustr2(u16(str2.toString(), error));
  64. if (U_FAILURE(error)) {
  65. data->setError(error);
  66. return false;
  67. }
  68. return (int64_t)ucol_strcoll(data->collator(),
  69. ustr1.getBuffer(), ustr1.length(),
  70. ustr2.getBuffer(), ustr2.length());
  71. }
  72. static int64_t HHVM_METHOD(Collator, getAttribute, int64_t attr) {
  73. FETCH_COL(data, this_, 0);
  74. data->clearError();
  75. UErrorCode error = U_ZERO_ERROR;
  76. int64_t ret = (int64_t)ucol_getAttribute(data->collator(),
  77. (UColAttribute)attr,
  78. &error);
  79. if (U_FAILURE(error)) {
  80. data->setError(error, "Error getting attribute value");
  81. return 0;
  82. }
  83. return ret;
  84. }
  85. static int64_t HHVM_METHOD(Collator, getErrorCode) {
  86. FETCH_COL(data, this_, 0);
  87. return data->getErrorCode();
  88. }
  89. static String HHVM_METHOD(Collator, getErrorMessage) {
  90. FETCH_COL(data, this_, "");
  91. return data->getErrorMessage();
  92. }
  93. static String HHVM_METHOD(Collator, getLocale, int64_t type) {
  94. FETCH_COL(data, this_, "");
  95. data->clearError();
  96. UErrorCode error = U_ZERO_ERROR;
  97. auto loc = ucol_getLocaleByType(data->collator(), (ULocDataLocaleType)type,
  98. &error);
  99. if (U_FAILURE(error)) {
  100. data->setError(error, "Error getting locale by type");
  101. }
  102. return String(loc, CopyString);
  103. }
  104. static int64_t HHVM_METHOD(Collator, getStrength) {
  105. FETCH_COL(data, this_, false);
  106. return ucol_getStrength(data->collator());
  107. }
  108. static bool HHVM_METHOD(Collator, setAttribute, int64_t attr, int64_t val) {
  109. FETCH_COL(data, this_, false);
  110. data->clearError();
  111. UErrorCode error = U_ZERO_ERROR;
  112. ucol_setAttribute(data->collator(), (UColAttribute)attr,
  113. (UColAttributeValue)val, &error);
  114. if (U_FAILURE(error)) {
  115. data->setError(error, "Error setting attribute value");
  116. return false;
  117. }
  118. return true;
  119. }
  120. static Variant HHVM_METHOD(Collator, getSortKey, const String& val) {
  121. FETCH_COL(data, this_, false);
  122. UErrorCode error = U_ZERO_ERROR;
  123. icu::UnicodeString strval(u16(val, error));
  124. if (U_FAILURE(error)) {
  125. return false;
  126. }
  127. int sortkey_len = ucol_getSortKey(data->collator(),
  128. strval.getBuffer(), strval.length(),
  129. nullptr,
  130. 0);
  131. if (sortkey_len <= 0) {
  132. return false;
  133. }
  134. String ret(sortkey_len + 1, ReserveString);
  135. sortkey_len = ucol_getSortKey(data->collator(),
  136. strval.getBuffer(), strval.length(),
  137. (uint8_t*) ret.get()->mutableData(),
  138. ret.capacity() + 1);
  139. if (sortkey_len <= 0) {
  140. return false;
  141. }
  142. ret.setSize(sortkey_len);
  143. return ret;
  144. }
  145. static bool HHVM_METHOD(Collator, setStrength, int64_t strength) {
  146. FETCH_COL(data, this_, false);
  147. ucol_setStrength(data->collator(), (UCollationStrength)strength);
  148. return true;
  149. }
  150. typedef struct _collator_sort_key_index {
  151. char* key; /* pointer to sort key */
  152. ssize_t valPos; /* position of the original array element */
  153. } collator_sort_key_index_t;
  154. /* Bytes to reserve for sort keys */
  155. static const int32_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
  156. static const int32_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
  157. /* Number of keys position to allocate */
  158. static const int32_t DEF_SORT_KEYS_INDX_BUF_SIZE = 512;
  159. static const int32_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 64;
  160. static int collator_cmp_sort_keys(const void* p1, const void* p2, const void*) {
  161. char* key1 = ((collator_sort_key_index_t*)p1)->key;
  162. char* key2 = ((collator_sort_key_index_t*)p2)->key;
  163. return strcmp( key1, key2 );
  164. }
  165. static bool HHVM_METHOD(Collator, sortWithSortKeys, VRefParam arr) {
  166. FETCH_COL(data, this_, false);
  167. data->clearError();
  168. if (!arr.isArray()) {
  169. return true;
  170. }
  171. Array hash = arr.toArray();
  172. if (hash.size() == 0) {
  173. return true;
  174. }
  175. // Preallocate sort keys buffer
  176. size_t sortKeysOffset = 0;
  177. size_t sortKeysLength = DEF_SORT_KEYS_BUF_SIZE;
  178. char* sortKeys = (char*)smart_malloc(sortKeysLength);
  179. if (!sortKeys) {
  180. throw Exception("Out of memory");
  181. }
  182. SCOPE_EXIT{ smart_free(sortKeys); };
  183. // Preallocate index buffer
  184. size_t sortIndexPos = 0;
  185. size_t sortIndexLength = DEF_SORT_KEYS_INDX_BUF_SIZE;
  186. auto sortIndex = (collator_sort_key_index_t*)smart_malloc(
  187. sortIndexLength * sizeof(collator_sort_key_index_t));
  188. if (!sortIndex) {
  189. throw Exception("Out of memory");
  190. }
  191. SCOPE_EXIT{ smart_free(sortIndex); };
  192. // Translate input hash to sortable index
  193. auto pos_limit = hash->iter_end();
  194. for (ssize_t pos = hash->iter_begin(); pos != pos_limit;
  195. pos = hash->iter_advance(pos)) {
  196. Variant val(hash->getValue(pos));
  197. // Convert to UTF16
  198. icu::UnicodeString strval;
  199. if (val.isString()) {
  200. UErrorCode error = U_ZERO_ERROR;
  201. strval = u16(val.toString(), error);
  202. if (U_FAILURE(error)) {
  203. return false;
  204. }
  205. }
  206. // Generate sort key
  207. int sortkey_len =
  208. ucol_getSortKey(data->collator(),
  209. strval.getBuffer(), strval.length(),
  210. (uint8_t*)(sortKeys + sortKeysOffset),
  211. sortKeysLength - sortKeysOffset);
  212. // Check for key buffer overflow
  213. if (sortkey_len > (sortKeysLength - sortKeysOffset)) {
  214. int32_t inc = (sortkey_len > DEF_SORT_KEYS_BUF_INCREMENT)
  215. ? sortkey_len : DEF_SORT_KEYS_BUF_INCREMENT;
  216. sortKeysLength += inc;
  217. sortKeys = (char*)smart_realloc(sortKeys, sortKeysLength);
  218. if (!sortKeys) {
  219. throw Exception("Out of memory");
  220. }
  221. sortkey_len =
  222. ucol_getSortKey(data->collator(),
  223. strval.getBuffer(), strval.length(),
  224. (uint8_t*)(sortKeys + sortKeysOffset),
  225. sortKeysLength - sortKeysOffset);
  226. assert(sortkey_len <= (sortKeysLength - sortKeysOffset));
  227. }
  228. // Check for index buffer overflow
  229. if ((sortIndexPos + 1) > sortIndexLength) {
  230. sortIndexLength += DEF_SORT_KEYS_INDX_BUF_INCREMENT;
  231. sortIndex = (collator_sort_key_index_t*)smart_realloc(sortIndex,
  232. sortIndexLength * sizeof(collator_sort_key_index_t));
  233. if (!sortIndex) {
  234. throw Exception("Out of memory");
  235. }
  236. }
  237. // Initially store offset into buffer, update later to deal with reallocs
  238. sortIndex[sortIndexPos].key = (char*)sortKeysOffset;
  239. sortKeysOffset += sortkey_len;
  240. sortIndex[sortIndexPos].valPos = pos;
  241. ++sortIndexPos;
  242. }
  243. // Update keys to location in realloc'd buffer
  244. for (int i = 0; i < sortIndexPos; ++i) {
  245. sortIndex[i].key = sortKeys + (ptrdiff_t)sortIndex[i].key;
  246. }
  247. zend_qsort(sortIndex, sortIndexPos,
  248. sizeof(collator_sort_key_index_t),
  249. collator_cmp_sort_keys, nullptr);
  250. Array ret = Array::Create();
  251. for (int i = 0; i < sortIndexPos; ++i) {
  252. ret.append(hash->getValue(sortIndex[i].valPos));
  253. }
  254. arr = ret;
  255. return true;
  256. }
  257. static bool HHVM_METHOD(Collator, sort, VRefParam arr,
  258. int64_t sort_flag /* = Collator::SORT_REGULAR */) {
  259. FETCH_COL(data, this_, false);
  260. if (!arr.isArray()) {
  261. throw_expected_array_exception("Collator::sort");
  262. return false;
  263. }
  264. data->clearError();
  265. bool ret = collator_sort(arr, sort_flag, true, data->collator(), data);
  266. if (U_FAILURE(data->getErrorCode())) {
  267. return false;
  268. }
  269. return ret;
  270. }
  271. //////////////////////////////////////////////////////////////////////////////
  272. #define CONST_SORT(v) Native::registerClassConstant<KindOfInt64> \
  273. (s_Collator.get(), makeStaticString("SORT_" #v), SORT_##v);
  274. #define CONST_UCOL(v) Native::registerClassConstant<KindOfInt64> \
  275. (s_Collator.get(), makeStaticString(#v), UCOL_##v);
  276. const StaticString s_DEFAULT_VALUE("DEFAULT_VALUE");
  277. void IntlExtension::initCollator() {
  278. HHVM_ME(Collator, __construct);
  279. HHVM_ME(Collator, asort);
  280. HHVM_ME(Collator, compare);
  281. HHVM_ME(Collator, getAttribute);
  282. HHVM_ME(Collator, getErrorCode);
  283. HHVM_ME(Collator, getErrorMessage);
  284. HHVM_ME(Collator, getLocale);
  285. HHVM_ME(Collator, getSortKey);
  286. HHVM_ME(Collator, getStrength);
  287. HHVM_ME(Collator, setAttribute);
  288. HHVM_ME(Collator, setStrength);
  289. HHVM_ME(Collator, sortWithSortKeys);
  290. HHVM_ME(Collator, sort);
  291. CONST_SORT(REGULAR);
  292. CONST_SORT(STRING);
  293. CONST_SORT(NUMERIC);
  294. CONST_UCOL(FRENCH_COLLATION);
  295. CONST_UCOL(ALTERNATE_HANDLING);
  296. CONST_UCOL(CASE_FIRST);
  297. CONST_UCOL(CASE_LEVEL);
  298. CONST_UCOL(NORMALIZATION_MODE);
  299. CONST_UCOL(STRENGTH);
  300. CONST_UCOL(HIRAGANA_QUATERNARY_MODE);
  301. CONST_UCOL(NUMERIC_COLLATION);
  302. CONST_UCOL(PRIMARY);
  303. CONST_UCOL(SECONDARY);
  304. CONST_UCOL(TERTIARY);
  305. CONST_UCOL(DEFAULT_STRENGTH);
  306. CONST_UCOL(QUATERNARY);
  307. CONST_UCOL(IDENTICAL);
  308. CONST_UCOL(OFF);
  309. CONST_UCOL(ON);
  310. CONST_UCOL(SHIFTED);
  311. CONST_UCOL(NON_IGNORABLE);
  312. CONST_UCOL(LOWER_FIRST);
  313. CONST_UCOL(UPPER_FIRST);
  314. Native::registerClassConstant<KindOfInt64>
  315. (s_Collator.get(), s_DEFAULT_VALUE.get(), UCOL_DEFAULT);
  316. Native::registerNativeDataInfo<Collator>(s_Collator.get());
  317. loadSystemlib("icu_collator");
  318. }
  319. //////////////////////////////////////////////////////////////////////////////
  320. }} // namespace HPHP::Intl