PageRenderTime 58ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/d/druntime/rt/aApplyR.d

https://bitbucket.org/fearog/gdc-nacl
D | 958 lines | 797 code | 110 blank | 51 comment | 310 complexity | 379b6464c0a98ba0172b18be2d764424 MD5 | raw file
Possible License(s): GPL-2.0, AGPL-1.0
  1. /**
  2. * This code handles decoding UTF strings for foreach_reverse loops. There are
  3. * 6 combinations of conversions between char, wchar, and dchar, and 2 of each
  4. * of those.
  5. *
  6. * Copyright: Copyright Digital Mars 2004 - 2010.
  7. * License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
  8. * Authors: Walter Bright, Sean Kelly
  9. */
  10. /* Copyright Digital Mars 2004 - 2010.
  11. * Distributed under the Boost Software License, Version 1.0.
  12. * (See accompanying file LICENSE_1_0.txt or copy at
  13. * http://www.boost.org/LICENSE_1_0.txt)
  14. */
  15. module rt.aApplyR;
  16. /* This code handles decoding UTF strings for foreach_reverse loops.
  17. * There are 6 combinations of conversions between char, wchar,
  18. * and dchar, and 2 of each of those.
  19. */
  20. private import rt.util.utf;
  21. /**********************************************/
  22. /* 1 argument versions */
  23. // dg is D, but _aApplyRcd() is C
  24. extern (D) alias int delegate(void *) dg_t;
  25. extern (C) int _aApplyRcd1(in char[] aa, dg_t dg)
  26. { int result;
  27. debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length);
  28. for (size_t i = aa.length; i != 0; )
  29. { dchar d;
  30. i--;
  31. d = aa[i];
  32. if (d & 0x80)
  33. { char c = cast(char)d;
  34. uint j;
  35. uint m = 0x3F;
  36. d = 0;
  37. while ((c & 0xC0) != 0xC0)
  38. { if (i == 0)
  39. onUnicodeError("Invalid UTF-8 sequence", 0);
  40. i--;
  41. d |= (c & 0x3F) << j;
  42. j += 6;
  43. m >>= 1;
  44. c = aa[i];
  45. }
  46. d |= (c & m) << j;
  47. }
  48. result = dg(cast(void *)&d);
  49. if (result)
  50. break;
  51. }
  52. return result;
  53. }
  54. unittest
  55. {
  56. debug(apply) printf("_aApplyRcd1.unittest\n");
  57. auto s = "hello"c[];
  58. int i;
  59. foreach_reverse(dchar d; s)
  60. {
  61. switch (i)
  62. {
  63. case 0: assert(d == 'o'); break;
  64. case 1: assert(d == 'l'); break;
  65. case 2: assert(d == 'l'); break;
  66. case 3: assert(d == 'e'); break;
  67. case 4: assert(d == 'h'); break;
  68. default: assert(0);
  69. }
  70. i++;
  71. }
  72. assert(i == 5);
  73. s = "a\u1234\U00100456b";
  74. i = 0;
  75. foreach_reverse(dchar d; s)
  76. {
  77. //printf("i = %d, d = %x\n", i, d);
  78. switch (i)
  79. {
  80. case 0: assert(d == 'b'); break;
  81. case 1: assert(d == '\U00100456'); break;
  82. case 2: assert(d == '\u1234'); break;
  83. case 3: assert(d == 'a'); break;
  84. default: assert(0);
  85. }
  86. i++;
  87. }
  88. assert(i == 4);
  89. }
  90. /*****************************/
  91. extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg)
  92. { int result;
  93. debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length);
  94. for (size_t i = aa.length; i != 0; )
  95. { dchar d;
  96. i--;
  97. d = aa[i];
  98. if (d >= 0xDC00 && d <= 0xDFFF)
  99. { if (i == 0)
  100. onUnicodeError("Invalid UTF-16 sequence", 0);
  101. i--;
  102. d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
  103. }
  104. result = dg(cast(void *)&d);
  105. if (result)
  106. break;
  107. }
  108. return result;
  109. }
  110. unittest
  111. {
  112. debug(apply) printf("_aApplyRwd1.unittest\n");
  113. auto s = "hello"w[];
  114. int i;
  115. foreach_reverse(dchar d; s)
  116. {
  117. switch (i)
  118. {
  119. case 0: assert(d == 'o'); break;
  120. case 1: assert(d == 'l'); break;
  121. case 2: assert(d == 'l'); break;
  122. case 3: assert(d == 'e'); break;
  123. case 4: assert(d == 'h'); break;
  124. default: assert(0);
  125. }
  126. i++;
  127. }
  128. assert(i == 5);
  129. s = "a\u1234\U00100456b";
  130. i = 0;
  131. foreach_reverse(dchar d; s)
  132. {
  133. //printf("i = %d, d = %x\n", i, d);
  134. switch (i)
  135. {
  136. case 0: assert(d == 'b'); break;
  137. case 1: assert(d == '\U00100456'); break;
  138. case 2: assert(d == '\u1234'); break;
  139. case 3: assert(d == 'a'); break;
  140. default: assert(0);
  141. }
  142. i++;
  143. }
  144. assert(i == 4);
  145. }
  146. /*****************************/
  147. extern (C) int _aApplyRcw1(in char[] aa, dg_t dg)
  148. { int result;
  149. debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length);
  150. for (size_t i = aa.length; i != 0; )
  151. { dchar d;
  152. wchar w;
  153. i--;
  154. w = aa[i];
  155. if (w & 0x80)
  156. { char c = cast(char)w;
  157. uint j;
  158. uint m = 0x3F;
  159. d = 0;
  160. while ((c & 0xC0) != 0xC0)
  161. { if (i == 0)
  162. onUnicodeError("Invalid UTF-8 sequence", 0);
  163. i--;
  164. d |= (c & 0x3F) << j;
  165. j += 6;
  166. m >>= 1;
  167. c = aa[i];
  168. }
  169. d |= (c & m) << j;
  170. if (d <= 0xFFFF)
  171. w = cast(wchar) d;
  172. else
  173. {
  174. w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
  175. result = dg(cast(void *)&w);
  176. if (result)
  177. break;
  178. w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
  179. }
  180. }
  181. result = dg(cast(void *)&w);
  182. if (result)
  183. break;
  184. }
  185. return result;
  186. }
  187. unittest
  188. {
  189. debug(apply) printf("_aApplyRcw1.unittest\n");
  190. auto s = "hello"c[];
  191. int i;
  192. foreach_reverse(wchar d; s)
  193. {
  194. switch (i)
  195. {
  196. case 0: assert(d == 'o'); break;
  197. case 1: assert(d == 'l'); break;
  198. case 2: assert(d == 'l'); break;
  199. case 3: assert(d == 'e'); break;
  200. case 4: assert(d == 'h'); break;
  201. default: assert(0);
  202. }
  203. i++;
  204. }
  205. assert(i == 5);
  206. s = "a\u1234\U00100456b";
  207. i = 0;
  208. foreach_reverse(wchar d; s)
  209. {
  210. //printf("i = %d, d = %x\n", i, d);
  211. switch (i)
  212. {
  213. case 0: assert(d == 'b'); break;
  214. case 1: assert(d == 0xDBC1); break;
  215. case 2: assert(d == 0xDC56); break;
  216. case 3: assert(d == 0x1234); break;
  217. case 4: assert(d == 'a'); break;
  218. default: assert(0);
  219. }
  220. i++;
  221. }
  222. assert(i == 5);
  223. }
  224. /*****************************/
  225. extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg)
  226. { int result;
  227. debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length);
  228. for (size_t i = aa.length; i != 0; )
  229. { dchar d;
  230. char c;
  231. i--;
  232. d = aa[i];
  233. if (d >= 0xDC00 && d <= 0xDFFF)
  234. { if (i == 0)
  235. onUnicodeError("Invalid UTF-16 sequence", 0);
  236. i--;
  237. d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
  238. }
  239. if (d & ~0x7F)
  240. {
  241. char[4] buf;
  242. auto b = toUTF8(buf, d);
  243. foreach (char c2; b)
  244. {
  245. result = dg(cast(void *)&c2);
  246. if (result)
  247. return result;
  248. }
  249. continue;
  250. }
  251. c = cast(char)d;
  252. result = dg(cast(void *)&c);
  253. if (result)
  254. break;
  255. }
  256. return result;
  257. }
  258. unittest
  259. {
  260. debug(apply) printf("_aApplyRwc1.unittest\n");
  261. auto s = "hello"w[];
  262. int i;
  263. foreach_reverse(char d; s)
  264. {
  265. switch (i)
  266. {
  267. case 0: assert(d == 'o'); break;
  268. case 1: assert(d == 'l'); break;
  269. case 2: assert(d == 'l'); break;
  270. case 3: assert(d == 'e'); break;
  271. case 4: assert(d == 'h'); break;
  272. default: assert(0);
  273. }
  274. i++;
  275. }
  276. assert(i == 5);
  277. s = "a\u1234\U00100456b";
  278. i = 0;
  279. foreach_reverse(char d; s)
  280. {
  281. //printf("i = %d, d = %x\n", i, d);
  282. switch (i)
  283. {
  284. case 0: assert(d == 'b'); break;
  285. case 1: assert(d == 0xF4); break;
  286. case 2: assert(d == 0x80); break;
  287. case 3: assert(d == 0x91); break;
  288. case 4: assert(d == 0x96); break;
  289. case 5: assert(d == 0xE1); break;
  290. case 6: assert(d == 0x88); break;
  291. case 7: assert(d == 0xB4); break;
  292. case 8: assert(d == 'a'); break;
  293. default: assert(0);
  294. }
  295. i++;
  296. }
  297. assert(i == 9);
  298. }
  299. /*****************************/
  300. extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg)
  301. { int result;
  302. debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length);
  303. for (size_t i = aa.length; i != 0;)
  304. { dchar d = aa[--i];
  305. char c;
  306. if (d & ~0x7F)
  307. {
  308. char[4] buf;
  309. auto b = toUTF8(buf, d);
  310. foreach (char c2; b)
  311. {
  312. result = dg(cast(void *)&c2);
  313. if (result)
  314. return result;
  315. }
  316. continue;
  317. }
  318. else
  319. {
  320. c = cast(char)d;
  321. }
  322. result = dg(cast(void *)&c);
  323. if (result)
  324. break;
  325. }
  326. return result;
  327. }
  328. unittest
  329. {
  330. debug(apply) printf("_aApplyRdc1.unittest\n");
  331. auto s = "hello"d[];
  332. int i;
  333. foreach_reverse(char d; s)
  334. {
  335. switch (i)
  336. {
  337. case 0: assert(d == 'o'); break;
  338. case 1: assert(d == 'l'); break;
  339. case 2: assert(d == 'l'); break;
  340. case 3: assert(d == 'e'); break;
  341. case 4: assert(d == 'h'); break;
  342. default: assert(0);
  343. }
  344. i++;
  345. }
  346. assert(i == 5);
  347. s = "a\u1234\U00100456b";
  348. i = 0;
  349. foreach_reverse(char d; s)
  350. {
  351. //printf("i = %d, d = %x\n", i, d);
  352. switch (i)
  353. {
  354. case 0: assert(d == 'b'); break;
  355. case 1: assert(d == 0xF4); break;
  356. case 2: assert(d == 0x80); break;
  357. case 3: assert(d == 0x91); break;
  358. case 4: assert(d == 0x96); break;
  359. case 5: assert(d == 0xE1); break;
  360. case 6: assert(d == 0x88); break;
  361. case 7: assert(d == 0xB4); break;
  362. case 8: assert(d == 'a'); break;
  363. default: assert(0);
  364. }
  365. i++;
  366. }
  367. assert(i == 9);
  368. }
  369. /*****************************/
  370. extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg)
  371. { int result;
  372. debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length);
  373. for (size_t i = aa.length; i != 0; )
  374. { dchar d = aa[--i];
  375. wchar w;
  376. if (d <= 0xFFFF)
  377. w = cast(wchar) d;
  378. else
  379. {
  380. w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
  381. result = dg(cast(void *)&w);
  382. if (result)
  383. break;
  384. w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
  385. }
  386. result = dg(cast(void *)&w);
  387. if (result)
  388. break;
  389. }
  390. return result;
  391. }
  392. unittest
  393. {
  394. debug(apply) printf("_aApplyRdw1.unittest\n");
  395. auto s = "hello"d[];
  396. int i;
  397. foreach_reverse(wchar d; s)
  398. {
  399. switch (i)
  400. {
  401. case 0: assert(d == 'o'); break;
  402. case 1: assert(d == 'l'); break;
  403. case 2: assert(d == 'l'); break;
  404. case 3: assert(d == 'e'); break;
  405. case 4: assert(d == 'h'); break;
  406. default: assert(0);
  407. }
  408. i++;
  409. }
  410. assert(i == 5);
  411. s = "a\u1234\U00100456b";
  412. i = 0;
  413. foreach_reverse(wchar d; s)
  414. {
  415. //printf("i = %d, d = %x\n", i, d);
  416. switch (i)
  417. {
  418. case 0: assert(d == 'b'); break;
  419. case 1: assert(d == 0xDBC1); break;
  420. case 2: assert(d == 0xDC56); break;
  421. case 3: assert(d == 0x1234); break;
  422. case 4: assert(d == 'a'); break;
  423. default: assert(0);
  424. }
  425. i++;
  426. }
  427. assert(i == 5);
  428. }
  429. /****************************************************************************/
  430. /* 2 argument versions */
  431. // dg is D, but _aApplyRcd2() is C
  432. extern (D) alias int delegate(void *, void *) dg2_t;
  433. extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg)
  434. { int result;
  435. size_t i;
  436. size_t len = aa.length;
  437. debug(apply) printf("_aApplyRcd2(), len = %d\n", len);
  438. for (i = len; i != 0; )
  439. { dchar d;
  440. i--;
  441. d = aa[i];
  442. if (d & 0x80)
  443. { char c = cast(char)d;
  444. uint j;
  445. uint m = 0x3F;
  446. d = 0;
  447. while ((c & 0xC0) != 0xC0)
  448. { if (i == 0)
  449. onUnicodeError("Invalid UTF-8 sequence", 0);
  450. i--;
  451. d |= (c & 0x3F) << j;
  452. j += 6;
  453. m >>= 1;
  454. c = aa[i];
  455. }
  456. d |= (c & m) << j;
  457. }
  458. result = dg(&i, cast(void *)&d);
  459. if (result)
  460. break;
  461. }
  462. return result;
  463. }
  464. unittest
  465. {
  466. debug(apply) printf("_aApplyRcd2.unittest\n");
  467. auto s = "hello"c[];
  468. int i;
  469. foreach_reverse(k, dchar d; s)
  470. {
  471. assert(k == 4 - i);
  472. switch (i)
  473. {
  474. case 0: assert(d == 'o'); break;
  475. case 1: assert(d == 'l'); break;
  476. case 2: assert(d == 'l'); break;
  477. case 3: assert(d == 'e'); break;
  478. case 4: assert(d == 'h'); break;
  479. default: assert(0);
  480. }
  481. i++;
  482. }
  483. assert(i == 5);
  484. s = "a\u1234\U00100456b";
  485. i = 0;
  486. foreach_reverse(k, dchar d; s)
  487. {
  488. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  489. switch (i)
  490. {
  491. case 0: assert(d == 'b'); assert(k == 8); break;
  492. case 1: assert(d == '\U00100456'); assert(k == 4); break;
  493. case 2: assert(d == '\u1234'); assert(k == 1); break;
  494. case 3: assert(d == 'a'); assert(k == 0); break;
  495. default: assert(0);
  496. }
  497. i++;
  498. }
  499. assert(i == 4);
  500. }
  501. /*****************************/
  502. extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg)
  503. { int result;
  504. debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length);
  505. for (size_t i = aa.length; i != 0; )
  506. { dchar d;
  507. i--;
  508. d = aa[i];
  509. if (d >= 0xDC00 && d <= 0xDFFF)
  510. { if (i == 0)
  511. onUnicodeError("Invalid UTF-16 sequence", 0);
  512. i--;
  513. d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
  514. }
  515. result = dg(&i, cast(void *)&d);
  516. if (result)
  517. break;
  518. }
  519. return result;
  520. }
  521. unittest
  522. {
  523. debug(apply) printf("_aApplyRwd2.unittest\n");
  524. auto s = "hello"w[];
  525. int i;
  526. foreach_reverse(k, dchar d; s)
  527. {
  528. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  529. assert(k == 4 - i);
  530. switch (i)
  531. {
  532. case 0: assert(d == 'o'); break;
  533. case 1: assert(d == 'l'); break;
  534. case 2: assert(d == 'l'); break;
  535. case 3: assert(d == 'e'); break;
  536. case 4: assert(d == 'h'); break;
  537. default: assert(0);
  538. }
  539. i++;
  540. }
  541. assert(i == 5);
  542. s = "a\u1234\U00100456b";
  543. i = 0;
  544. foreach_reverse(k, dchar d; s)
  545. {
  546. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  547. switch (i)
  548. {
  549. case 0: assert(k == 4); assert(d == 'b'); break;
  550. case 1: assert(k == 2); assert(d == '\U00100456'); break;
  551. case 2: assert(k == 1); assert(d == '\u1234'); break;
  552. case 3: assert(k == 0); assert(d == 'a'); break;
  553. default: assert(0);
  554. }
  555. i++;
  556. }
  557. assert(i == 4);
  558. }
  559. /*****************************/
  560. extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg)
  561. { int result;
  562. debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length);
  563. for (size_t i = aa.length; i != 0; )
  564. { dchar d;
  565. wchar w;
  566. i--;
  567. w = aa[i];
  568. if (w & 0x80)
  569. { char c = cast(char)w;
  570. uint j;
  571. uint m = 0x3F;
  572. d = 0;
  573. while ((c & 0xC0) != 0xC0)
  574. { if (i == 0)
  575. onUnicodeError("Invalid UTF-8 sequence", 0);
  576. i--;
  577. d |= (c & 0x3F) << j;
  578. j += 6;
  579. m >>= 1;
  580. c = aa[i];
  581. }
  582. d |= (c & m) << j;
  583. if (d <= 0xFFFF)
  584. w = cast(wchar) d;
  585. else
  586. {
  587. w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
  588. result = dg(&i, cast(void *)&w);
  589. if (result)
  590. break;
  591. w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
  592. }
  593. }
  594. result = dg(&i, cast(void *)&w);
  595. if (result)
  596. break;
  597. }
  598. return result;
  599. }
  600. unittest
  601. {
  602. debug(apply) printf("_aApplyRcw2.unittest\n");
  603. auto s = "hello"c[];
  604. int i;
  605. foreach_reverse(k, wchar d; s)
  606. {
  607. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  608. assert(k == 4 - i);
  609. switch (i)
  610. {
  611. case 0: assert(d == 'o'); break;
  612. case 1: assert(d == 'l'); break;
  613. case 2: assert(d == 'l'); break;
  614. case 3: assert(d == 'e'); break;
  615. case 4: assert(d == 'h'); break;
  616. default: assert(0);
  617. }
  618. i++;
  619. }
  620. assert(i == 5);
  621. s = "a\u1234\U00100456b";
  622. i = 0;
  623. foreach_reverse(k, wchar d; s)
  624. {
  625. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  626. switch (i)
  627. {
  628. case 0: assert(k == 8); assert(d == 'b'); break;
  629. case 1: assert(k == 4); assert(d == 0xDBC1); break;
  630. case 2: assert(k == 4); assert(d == 0xDC56); break;
  631. case 3: assert(k == 1); assert(d == 0x1234); break;
  632. case 4: assert(k == 0); assert(d == 'a'); break;
  633. default: assert(0);
  634. }
  635. i++;
  636. }
  637. assert(i == 5);
  638. }
  639. /*****************************/
  640. extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg)
  641. { int result;
  642. debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length);
  643. for (size_t i = aa.length; i != 0; )
  644. { dchar d;
  645. char c;
  646. i--;
  647. d = aa[i];
  648. if (d >= 0xDC00 && d <= 0xDFFF)
  649. { if (i == 0)
  650. onUnicodeError("Invalid UTF-16 sequence", 0);
  651. i--;
  652. d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
  653. }
  654. if (d & ~0x7F)
  655. {
  656. char[4] buf;
  657. auto b = toUTF8(buf, d);
  658. foreach (char c2; b)
  659. {
  660. result = dg(&i, cast(void *)&c2);
  661. if (result)
  662. return result;
  663. }
  664. continue;
  665. }
  666. c = cast(char)d;
  667. result = dg(&i, cast(void *)&c);
  668. if (result)
  669. break;
  670. }
  671. return result;
  672. }
  673. unittest
  674. {
  675. debug(apply) printf("_aApplyRwc2.unittest\n");
  676. auto s = "hello"w[];
  677. int i;
  678. foreach_reverse(k, char d; s)
  679. {
  680. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  681. assert(k == 4 - i);
  682. switch (i)
  683. {
  684. case 0: assert(d == 'o'); break;
  685. case 1: assert(d == 'l'); break;
  686. case 2: assert(d == 'l'); break;
  687. case 3: assert(d == 'e'); break;
  688. case 4: assert(d == 'h'); break;
  689. default: assert(0);
  690. }
  691. i++;
  692. }
  693. assert(i == 5);
  694. s = "a\u1234\U00100456b";
  695. i = 0;
  696. foreach_reverse(k, char d; s)
  697. {
  698. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  699. switch (i)
  700. {
  701. case 0: assert(k == 4); assert(d == 'b'); break;
  702. case 1: assert(k == 2); assert(d == 0xF4); break;
  703. case 2: assert(k == 2); assert(d == 0x80); break;
  704. case 3: assert(k == 2); assert(d == 0x91); break;
  705. case 4: assert(k == 2); assert(d == 0x96); break;
  706. case 5: assert(k == 1); assert(d == 0xE1); break;
  707. case 6: assert(k == 1); assert(d == 0x88); break;
  708. case 7: assert(k == 1); assert(d == 0xB4); break;
  709. case 8: assert(k == 0); assert(d == 'a'); break;
  710. default: assert(0);
  711. }
  712. i++;
  713. }
  714. assert(i == 9);
  715. }
  716. /*****************************/
  717. extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg)
  718. { int result;
  719. debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length);
  720. for (size_t i = aa.length; i != 0; )
  721. { dchar d = aa[--i];
  722. char c;
  723. if (d & ~0x7F)
  724. {
  725. char[4] buf;
  726. auto b = toUTF8(buf, d);
  727. foreach (char c2; b)
  728. {
  729. result = dg(&i, cast(void *)&c2);
  730. if (result)
  731. return result;
  732. }
  733. continue;
  734. }
  735. else
  736. { c = cast(char)d;
  737. }
  738. result = dg(&i, cast(void *)&c);
  739. if (result)
  740. break;
  741. }
  742. return result;
  743. }
  744. unittest
  745. {
  746. debug(apply) printf("_aApplyRdc2.unittest\n");
  747. auto s = "hello"d[];
  748. int i;
  749. foreach_reverse(k, char d; s)
  750. {
  751. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  752. assert(k == 4 - i);
  753. switch (i)
  754. {
  755. case 0: assert(d == 'o'); break;
  756. case 1: assert(d == 'l'); break;
  757. case 2: assert(d == 'l'); break;
  758. case 3: assert(d == 'e'); break;
  759. case 4: assert(d == 'h'); break;
  760. default: assert(0);
  761. }
  762. i++;
  763. }
  764. assert(i == 5);
  765. s = "a\u1234\U00100456b";
  766. i = 0;
  767. foreach_reverse(k, char d; s)
  768. {
  769. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  770. switch (i)
  771. {
  772. case 0: assert(k == 3); assert(d == 'b'); break;
  773. case 1: assert(k == 2); assert(d == 0xF4); break;
  774. case 2: assert(k == 2); assert(d == 0x80); break;
  775. case 3: assert(k == 2); assert(d == 0x91); break;
  776. case 4: assert(k == 2); assert(d == 0x96); break;
  777. case 5: assert(k == 1); assert(d == 0xE1); break;
  778. case 6: assert(k == 1); assert(d == 0x88); break;
  779. case 7: assert(k == 1); assert(d == 0xB4); break;
  780. case 8: assert(k == 0); assert(d == 'a'); break;
  781. default: assert(0);
  782. }
  783. i++;
  784. }
  785. assert(i == 9);
  786. }
  787. /*****************************/
  788. extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg)
  789. { int result;
  790. debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length);
  791. for (size_t i = aa.length; i != 0; )
  792. { dchar d = aa[--i];
  793. wchar w;
  794. if (d <= 0xFFFF)
  795. w = cast(wchar) d;
  796. else
  797. {
  798. w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
  799. result = dg(&i, cast(void *)&w);
  800. if (result)
  801. break;
  802. w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
  803. }
  804. result = dg(&i, cast(void *)&w);
  805. if (result)
  806. break;
  807. }
  808. return result;
  809. }
  810. unittest
  811. {
  812. debug(apply) printf("_aApplyRdw2.unittest\n");
  813. auto s = "hello"d[];
  814. int i;
  815. foreach_reverse(k, wchar d; s)
  816. {
  817. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  818. assert(k == 4 - i);
  819. switch (i)
  820. {
  821. case 0: assert(d == 'o'); break;
  822. case 1: assert(d == 'l'); break;
  823. case 2: assert(d == 'l'); break;
  824. case 3: assert(d == 'e'); break;
  825. case 4: assert(d == 'h'); break;
  826. default: assert(0);
  827. }
  828. i++;
  829. }
  830. assert(i == 5);
  831. s = "a\u1234\U00100456b";
  832. i = 0;
  833. foreach_reverse(k, wchar d; s)
  834. {
  835. //printf("i = %d, k = %d, d = %x\n", i, k, d);
  836. switch (i)
  837. {
  838. case 0: assert(k == 3); assert(d == 'b'); break;
  839. case 1: assert(k == 2); assert(d == 0xDBC1); break;
  840. case 2: assert(k == 2); assert(d == 0xDC56); break;
  841. case 3: assert(k == 1); assert(d == 0x1234); break;
  842. case 4: assert(k == 0); assert(d == 'a'); break;
  843. default: assert(0);
  844. }
  845. i++;
  846. }
  847. assert(i == 5);
  848. }