PageRenderTime 47ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/external/icu4c/samples/ucnv/convsamp.cpp

https://gitlab.com/brian0218/rk3066_r-box_android4.2.2_sdk
C++ | 1106 lines | 701 code | 230 blank | 175 comment | 74 complexity | 578d147e19683e1aba30faa9c1d45f73 MD5 | raw file
  1. /**************************************************************************
  2. *
  3. * Copyright (C) 2000-2011, International Business Machines
  4. * Corporation and others. All Rights Reserved.
  5. *
  6. ***************************************************************************
  7. * file name: convsamp.c
  8. * encoding: ASCII (7-bit)
  9. *
  10. * created on: 2000may30
  11. * created by: Steven R. Loomis
  12. *
  13. * Sample code for the ICU conversion routines.
  14. *
  15. * Note: Nothing special is needed to build this sample. Link with
  16. * the icu UC and icu I18N libraries.
  17. *
  18. * I use 'assert' for error checking, you probably will want
  19. * something more flexible. '***BEGIN SAMPLE***' and
  20. * '***END SAMPLE***' mark pieces suitable for stand alone
  21. * code snippets.
  22. *
  23. *
  24. * Each test can define it's own BUFFERSIZE
  25. *
  26. */
  27. #define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
  28. #include <stdio.h>
  29. #include <ctype.h> /* for isspace, etc. */
  30. #include <assert.h>
  31. #include <string.h>
  32. #include <stdlib.h> /* malloc */
  33. #include "unicode/utypes.h" /* Basic ICU data types */
  34. #include "unicode/ucnv.h" /* C Converter API */
  35. #include "unicode/ustring.h" /* some more string fcns*/
  36. #include "unicode/uchar.h" /* char names */
  37. #include "unicode/uloc.h"
  38. #include "unicode/unistr.h"
  39. #include "flagcb.h"
  40. /* Some utility functions */
  41. static const UChar kNone[] = { 0x0000 };
  42. #define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
  43. /* Print a UChar if possible, in seven characters. */
  44. void prettyPrintUChar(UChar c)
  45. {
  46. if( (c <= 0x007F) &&
  47. (isgraph(c)) ) {
  48. printf(" '%c' ", (char)(0x00FF&c));
  49. } else if ( c > 0x007F ) {
  50. char buf[1000];
  51. UErrorCode status = U_ZERO_ERROR;
  52. int32_t o;
  53. o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status);
  54. if(U_SUCCESS(status) && (o>0) ) {
  55. buf[6] = 0;
  56. printf("%7s", buf);
  57. } else {
  58. o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status);
  59. if(U_SUCCESS(status) && (o>0)) {
  60. buf[5] = 0;
  61. printf("~%6s", buf);
  62. }
  63. else {
  64. printf(" ??????");
  65. }
  66. }
  67. } else {
  68. switch((char)(c & 0x007F)) {
  69. case ' ':
  70. printf(" ' ' ");
  71. break;
  72. case '\t':
  73. printf(" \\t ");
  74. break;
  75. case '\n':
  76. printf(" \\n ");
  77. break;
  78. default:
  79. printf(" _ ");
  80. break;
  81. }
  82. }
  83. }
  84. void printUChars(const char *name = "?",
  85. const UChar *uch = kNone,
  86. int32_t len = -1 )
  87. {
  88. int32_t i;
  89. if( (len == -1) && (uch) ) {
  90. len = u_strlen(uch);
  91. }
  92. printf("%5s: ", name);
  93. for( i = 0; i <len; i++) {
  94. printf("%-6d ", i);
  95. }
  96. printf("\n");
  97. printf("%5s: ", "uni");
  98. for( i = 0; i <len; i++) {
  99. printf("\\u%04X ", (int)uch[i]);
  100. }
  101. printf("\n");
  102. printf("%5s:", "ch");
  103. for( i = 0; i <len; i++) {
  104. prettyPrintUChar(uch[i]);
  105. }
  106. printf("\n");
  107. }
  108. void printBytes(const char *name = "?",
  109. const char *uch = "",
  110. int32_t len = -1 )
  111. {
  112. int32_t i;
  113. if( (len == -1) && (uch) ) {
  114. len = strlen(uch);
  115. }
  116. printf("%5s: ", name);
  117. for( i = 0; i <len; i++) {
  118. printf("%-4d ", i);
  119. }
  120. printf("\n");
  121. printf("%5s: ", "uni");
  122. for( i = 0; i <len; i++) {
  123. printf("\\x%02X ", 0x00FF & (int)uch[i]);
  124. }
  125. printf("\n");
  126. printf("%5s:", "ch");
  127. for( i = 0; i <len; i++) {
  128. if(isgraph(0x00FF & (int)uch[i])) {
  129. printf(" '%c' ", (char)uch[i]);
  130. } else {
  131. printf(" ");
  132. }
  133. }
  134. printf("\n");
  135. }
  136. void printUChar(UChar32 ch32)
  137. {
  138. if(ch32 > 0xFFFF) {
  139. printf("ch: U+%06X\n", ch32);
  140. }
  141. else {
  142. UChar ch = (UChar)ch32;
  143. printUChars("C", &ch, 1);
  144. }
  145. }
  146. /*******************************************************************
  147. Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
  148. followed by an exclamation mark (!) into the KOI8-R Russian code page.
  149. This example first creates a UChar String out of the Unicode chars.
  150. targetSize must be set to the amount of space available in the target
  151. buffer. After fromUChars is called,
  152. len will contain the number of bytes in target[] which were
  153. used in the resulting codepage. In this case, there is a 1:1 mapping
  154. between the input and output characters. The exclamation mark has the
  155. same value in both KOI8-R and Unicode.
  156. src: 0 1 2 3 4 5 6
  157. uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
  158. ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
  159. targ: 0 1 2 3 4 5 6
  160. uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
  161. ch: '!'
  162. Converting FROM unicode
  163. to koi8-r.
  164. You must call ucnv_close to clean up the memory used by the
  165. converter.
  166. 'len' returns the number of OUTPUT bytes resulting from the
  167. conversion.
  168. */
  169. UErrorCode convsample_02()
  170. {
  171. printf("\n\n==============================================\n"
  172. "Sample 02: C: simple Unicode -> koi8-r conversion\n");
  173. // **************************** START SAMPLE *******************
  174. // "cat<cat>OK"
  175. UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
  176. 0x0430, 0x0021, 0x0000 };
  177. char target[100];
  178. UErrorCode status = U_ZERO_ERROR;
  179. UConverter *conv;
  180. int32_t len;
  181. // set up the converter
  182. conv = ucnv_open("koi8-r", &status);
  183. assert(U_SUCCESS(status));
  184. // convert to koi8-r
  185. len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
  186. assert(U_SUCCESS(status));
  187. // close the converter
  188. ucnv_close(conv);
  189. // ***************************** END SAMPLE ********************
  190. // Print it out
  191. printUChars("src", source);
  192. printf("\n");
  193. printBytes("targ", target, len);
  194. return U_ZERO_ERROR;
  195. }
  196. UErrorCode convsample_03()
  197. {
  198. printf("\n\n==============================================\n"
  199. "Sample 03: C: print out all converters\n");
  200. int32_t count;
  201. int32_t i;
  202. // **************************** START SAMPLE *******************
  203. count = ucnv_countAvailable();
  204. printf("Available converters: %d\n", count);
  205. for(i=0;i<count;i++)
  206. {
  207. printf("%s ", ucnv_getAvailableName(i));
  208. }
  209. // ***************************** END SAMPLE ********************
  210. printf("\n");
  211. return U_ZERO_ERROR;
  212. }
  213. #define BUFFERSIZE 17 /* make it interesting :) */
  214. /*
  215. Converting from a codepage to Unicode in bulk..
  216. What is the best way to determine the buffer size?
  217. The 'buffersize' is in bytes of input.
  218. For a given converter, divinding this by the minimum char size
  219. give you the maximum number of Unicode characters that could be
  220. expected for a given number of input bytes.
  221. see: ucnv_getMinCharSize()
  222. For example, a single byte codepage like 'Latin-3' has a
  223. minimum char size of 1. (It takes at least 1 byte to represent
  224. each Unicode char.) So the unicode buffer has the same number of
  225. UChars as the input buffer has bytes.
  226. In a strictly double byte codepage such as cp1362 (Windows
  227. Korean), the minimum char size is 2. So, only half as many Unicode
  228. chars as bytes are needed.
  229. This work to calculate the buffer size is an optimization. Any
  230. size of input and output buffer can be used, as long as the
  231. program handles the following cases: If the input buffer is empty,
  232. the source pointer will be equal to sourceLimit. If the output
  233. buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
  234. */
  235. UErrorCode convsample_05()
  236. {
  237. printf("\n\n==============================================\n"
  238. "Sample 05: C: count the number of letters in a UTF-8 document\n");
  239. FILE *f;
  240. int32_t count;
  241. char inBuf[BUFFERSIZE];
  242. const char *source;
  243. const char *sourceLimit;
  244. UChar *uBuf;
  245. UChar *target;
  246. UChar *targetLimit;
  247. UChar *p;
  248. int32_t uBufSize = 0;
  249. UConverter *conv;
  250. UErrorCode status = U_ZERO_ERROR;
  251. uint32_t letters=0, total=0;
  252. f = fopen("data01.txt", "r");
  253. if(!f)
  254. {
  255. fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
  256. return U_FILE_ACCESS_ERROR;
  257. }
  258. // **************************** START SAMPLE *******************
  259. conv = ucnv_open("utf-8", &status);
  260. assert(U_SUCCESS(status));
  261. uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
  262. printf("input bytes %d / min chars %d = %d UChars\n",
  263. BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
  264. uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
  265. assert(uBuf!=NULL);
  266. // grab another buffer's worth
  267. while((!feof(f)) &&
  268. ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
  269. {
  270. // Convert bytes to unicode
  271. source = inBuf;
  272. sourceLimit = inBuf + count;
  273. do
  274. {
  275. target = uBuf;
  276. targetLimit = uBuf + uBufSize;
  277. ucnv_toUnicode(conv, &target, targetLimit,
  278. &source, sourceLimit, NULL,
  279. feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
  280. /* is true (when no more data will come) */
  281. &status);
  282. if(status == U_BUFFER_OVERFLOW_ERROR)
  283. {
  284. // simply ran out of space - we'll reset the target ptr the next
  285. // time through the loop.
  286. status = U_ZERO_ERROR;
  287. }
  288. else
  289. {
  290. // Check other errors here.
  291. assert(U_SUCCESS(status));
  292. // Break out of the loop (by force)
  293. }
  294. // Process the Unicode
  295. // Todo: handle UTF-16/surrogates
  296. for(p = uBuf; p<target; p++)
  297. {
  298. if(u_isalpha(*p))
  299. letters++;
  300. total++;
  301. }
  302. } while (source < sourceLimit); // while simply out of space
  303. }
  304. printf("%d letters out of %d total UChars.\n", letters, total);
  305. // ***************************** END SAMPLE ********************
  306. ucnv_close(conv);
  307. printf("\n");
  308. fclose(f);
  309. return U_ZERO_ERROR;
  310. }
  311. #undef BUFFERSIZE
  312. #define BUFFERSIZE 1024
  313. typedef struct
  314. {
  315. UChar32 codepoint;
  316. uint32_t frequency;
  317. } CharFreqInfo;
  318. UErrorCode convsample_06()
  319. {
  320. printf("\n\n==============================================\n"
  321. "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
  322. FILE *f;
  323. int32_t count;
  324. char inBuf[BUFFERSIZE];
  325. const char *source;
  326. const char *sourceLimit;
  327. int32_t uBufSize = 0;
  328. UConverter *conv;
  329. UErrorCode status = U_ZERO_ERROR;
  330. uint32_t letters=0, total=0;
  331. CharFreqInfo *info;
  332. UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
  333. UChar32 p;
  334. uint32_t ie = 0;
  335. uint32_t gh = 0;
  336. UChar32 l = 0;
  337. f = fopen("data06.txt", "r");
  338. if(!f)
  339. {
  340. fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
  341. return U_FILE_ACCESS_ERROR;
  342. }
  343. info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
  344. if(!info)
  345. {
  346. fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
  347. }
  348. /* reset frequencies */
  349. for(p=0;p<charCount;p++)
  350. {
  351. info[p].codepoint = p;
  352. info[p].frequency = 0;
  353. }
  354. // **************************** START SAMPLE *******************
  355. conv = ucnv_open("utf-8", &status);
  356. assert(U_SUCCESS(status));
  357. uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
  358. printf("input bytes %d / min chars %d = %d UChars\n",
  359. BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
  360. // grab another buffer's worth
  361. while((!feof(f)) &&
  362. ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
  363. {
  364. // Convert bytes to unicode
  365. source = inBuf;
  366. sourceLimit = inBuf + count;
  367. while(source < sourceLimit)
  368. {
  369. p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
  370. if(U_FAILURE(status))
  371. {
  372. fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
  373. status = U_ZERO_ERROR;
  374. continue;
  375. }
  376. U_ASSERT(status);
  377. total++;
  378. if(u_isalpha(p))
  379. letters++;
  380. if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
  381. ie++;
  382. if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
  383. gh++;
  384. if(p>charCount)
  385. {
  386. fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
  387. free(info);
  388. fclose(f);
  389. ucnv_close(conv);
  390. return U_UNSUPPORTED_ERROR;
  391. }
  392. info[p].frequency++;
  393. l = p;
  394. }
  395. }
  396. fclose(f);
  397. ucnv_close(conv);
  398. printf("%d letters out of %d total UChars.\n", letters, total);
  399. printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
  400. // now, we could sort it..
  401. // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
  402. for(p=0;p<charCount;p++)
  403. {
  404. if(info[p].frequency)
  405. {
  406. printf("% 5d U+%06X ", info[p].frequency, p);
  407. if(p <= 0xFFFF)
  408. {
  409. prettyPrintUChar((UChar)p);
  410. }
  411. printf("\n");
  412. }
  413. }
  414. free(info);
  415. // ***************************** END SAMPLE ********************
  416. printf("\n");
  417. return U_ZERO_ERROR;
  418. }
  419. #undef BUFFERSIZE
  420. /******************************************************
  421. You must call ucnv_close to clean up the memory used by the
  422. converter.
  423. 'len' returns the number of OUTPUT bytes resulting from the
  424. conversion.
  425. */
  426. UErrorCode convsample_12()
  427. {
  428. printf("\n\n==============================================\n"
  429. "Sample 12: C: simple sjis -> unicode conversion\n");
  430. // **************************** START SAMPLE *******************
  431. char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
  432. UChar target[100];
  433. UErrorCode status = U_ZERO_ERROR;
  434. UConverter *conv;
  435. int32_t len;
  436. // set up the converter
  437. conv = ucnv_open("shift_jis", &status);
  438. assert(U_SUCCESS(status));
  439. // convert to Unicode
  440. // Note: we can use strlen, we know it's an 8 bit null terminated codepage
  441. target[6] = 0xFDCA;
  442. len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status);
  443. U_ASSERT(status);
  444. // close the converter
  445. ucnv_close(conv);
  446. // ***************************** END SAMPLE ********************
  447. // Print it out
  448. printBytes("src", source, strlen(source) );
  449. printf("\n");
  450. printUChars("targ", target, len);
  451. return U_ZERO_ERROR;
  452. }
  453. /******************************************************************
  454. C: Convert from codepage to Unicode one at a time.
  455. */
  456. UErrorCode convsample_13()
  457. {
  458. printf("\n\n==============================================\n"
  459. "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
  460. const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
  461. // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
  462. const char *source, *sourceLimit;
  463. UChar32 target;
  464. UErrorCode status = U_ZERO_ERROR;
  465. UConverter *conv = NULL;
  466. int32_t srcCount=0;
  467. int32_t dstCount=0;
  468. srcCount = sizeof(sourceChars);
  469. conv = ucnv_open("Big5", &status);
  470. U_ASSERT(status);
  471. source = sourceChars;
  472. sourceLimit = sourceChars + sizeof(sourceChars);
  473. // **************************** START SAMPLE *******************
  474. printBytes("src",source,sourceLimit-source);
  475. while(source < sourceLimit)
  476. {
  477. puts("");
  478. target = ucnv_getNextUChar (conv,
  479. &source,
  480. sourceLimit,
  481. &status);
  482. // printBytes("src",source,sourceLimit-source);
  483. U_ASSERT(status);
  484. printUChar(target);
  485. dstCount++;
  486. }
  487. // ************************** END SAMPLE *************************
  488. printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount);
  489. ucnv_close(conv);
  490. return U_ZERO_ERROR;
  491. }
  492. UBool convsample_20_didSubstitute(const char *source)
  493. {
  494. UChar uchars[100];
  495. char bytes[100];
  496. UConverter *conv = NULL;
  497. UErrorCode status = U_ZERO_ERROR;
  498. uint32_t len, len2;
  499. UBool flagVal;
  500. FromUFLAGContext * context = NULL;
  501. printf("\n\n==============================================\n"
  502. "Sample 20: C: Test for substitution using callbacks\n");
  503. /* print out the original source */
  504. printBytes("src", source);
  505. printf("\n");
  506. /* First, convert from UTF8 to unicode */
  507. conv = ucnv_open("utf-8", &status);
  508. U_ASSERT(status);
  509. len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
  510. U_ASSERT(status);
  511. printUChars("uch", uchars, len);
  512. printf("\n");
  513. /* Now, close the converter */
  514. ucnv_close(conv);
  515. /* Now, convert to windows-1252 */
  516. conv = ucnv_open("windows-1252", &status);
  517. U_ASSERT(status);
  518. /* Converter starts out with the SUBSTITUTE callback set. */
  519. /* initialize our callback */
  520. context = flagCB_fromU_openContext();
  521. /* Set our special callback */
  522. ucnv_setFromUCallBack(conv,
  523. flagCB_fromU,
  524. context,
  525. &(context->subCallback),
  526. &(context->subContext),
  527. &status);
  528. U_ASSERT(status);
  529. len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
  530. U_ASSERT(status);
  531. flagVal = context->flag; /* it's about to go away when we close the cnv */
  532. ucnv_close(conv);
  533. /* print out the original source */
  534. printBytes("bytes", bytes, len2);
  535. return flagVal; /* true if callback was called */
  536. }
  537. UErrorCode convsample_20()
  538. {
  539. const char *sample1 = "abc\xdf\xbf";
  540. const char *sample2 = "abc_def";
  541. if(convsample_20_didSubstitute(sample1))
  542. {
  543. printf("DID substitute.\n******\n");
  544. }
  545. else
  546. {
  547. printf("Did NOT substitute.\n*****\n");
  548. }
  549. if(convsample_20_didSubstitute(sample2))
  550. {
  551. printf("DID substitute.\n******\n");
  552. }
  553. else
  554. {
  555. printf("Did NOT substitute.\n*****\n");
  556. }
  557. return U_ZERO_ERROR;
  558. }
  559. // 21 - C, callback, with clone and debug
  560. UBool convsample_21_didSubstitute(const char *source)
  561. {
  562. UChar uchars[100];
  563. char bytes[100];
  564. UConverter *conv = NULL, *cloneCnv = NULL;
  565. UErrorCode status = U_ZERO_ERROR;
  566. uint32_t len, len2;
  567. int32_t cloneLen;
  568. UBool flagVal = FALSE;
  569. UConverterFromUCallback junkCB;
  570. FromUFLAGContext *flagCtx = NULL,
  571. *cloneFlagCtx = NULL;
  572. debugCBContext *debugCtx1 = NULL,
  573. *debugCtx2 = NULL,
  574. *cloneDebugCtx = NULL;
  575. printf("\n\n==============================================\n"
  576. "Sample 21: C: Test for substitution w/ callbacks & clones \n");
  577. /* print out the original source */
  578. printBytes("src", source);
  579. printf("\n");
  580. /* First, convert from UTF8 to unicode */
  581. conv = ucnv_open("utf-8", &status);
  582. U_ASSERT(status);
  583. len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
  584. U_ASSERT(status);
  585. printUChars("uch", uchars, len);
  586. printf("\n");
  587. /* Now, close the converter */
  588. ucnv_close(conv);
  589. /* Now, convert to windows-1252 */
  590. conv = ucnv_open("windows-1252", &status);
  591. U_ASSERT(status);
  592. /* Converter starts out with the SUBSTITUTE callback set. */
  593. /* initialize our callback */
  594. /* from the 'bottom' innermost, out
  595. * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
  596. #if DEBUG_TMI
  597. printf("flagCB_fromU = %p\n", &flagCB_fromU);
  598. printf("debugCB_fromU = %p\n", &debugCB_fromU);
  599. #endif
  600. debugCtx1 = debugCB_openContext();
  601. flagCtx = flagCB_fromU_openContext();
  602. debugCtx2 = debugCB_openContext();
  603. debugCtx1->subCallback = flagCB_fromU; /* debug1 -> flag */
  604. debugCtx1->subContext = flagCtx;
  605. flagCtx->subCallback = debugCB_fromU; /* flag -> debug2 */
  606. flagCtx->subContext = debugCtx2;
  607. debugCtx2->subCallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE;
  608. debugCtx2->subContext = NULL;
  609. /* Set our special callback */
  610. ucnv_setFromUCallBack(conv,
  611. debugCB_fromU,
  612. debugCtx1,
  613. &(debugCtx2->subCallback),
  614. &(debugCtx2->subContext),
  615. &status);
  616. U_ASSERT(status);
  617. #if DEBUG_TMI
  618. printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
  619. conv, debugCtx1, debugCtx1->subCallback,
  620. debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
  621. #endif
  622. cloneLen = 1; /* but passing in null so it will clone */
  623. cloneCnv = ucnv_safeClone(conv, NULL, &cloneLen, &status);
  624. U_ASSERT(status);
  625. #if DEBUG_TMI
  626. printf("Cloned converter from %p -> %p. Closing %p.\n", conv, cloneCnv, conv);
  627. #endif
  628. ucnv_close(conv);
  629. #if DEBUG_TMI
  630. printf("%p closed.\n", conv);
  631. #endif
  632. U_ASSERT(status);
  633. /* Now, we have to extract the context */
  634. cloneDebugCtx = NULL;
  635. cloneFlagCtx = NULL;
  636. ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx);
  637. if(cloneDebugCtx != NULL) {
  638. cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext;
  639. }
  640. printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
  641. cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL );
  642. len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status);
  643. U_ASSERT(status);
  644. if(cloneFlagCtx != NULL) {
  645. flagVal = cloneFlagCtx->flag; /* it's about to go away when we close the cnv */
  646. } else {
  647. printf("** Warning, couldn't get the subcallback \n");
  648. }
  649. ucnv_close(cloneCnv);
  650. /* print out the original source */
  651. printBytes("bytes", bytes, len2);
  652. return flagVal; /* true if callback was called */
  653. }
  654. UErrorCode convsample_21()
  655. {
  656. const char *sample1 = "abc\xdf\xbf";
  657. const char *sample2 = "abc_def";
  658. if(convsample_21_didSubstitute(sample1))
  659. {
  660. printf("DID substitute.\n******\n");
  661. }
  662. else
  663. {
  664. printf("Did NOT substitute.\n*****\n");
  665. }
  666. if(convsample_21_didSubstitute(sample2))
  667. {
  668. printf("DID substitute.\n******\n");
  669. }
  670. else
  671. {
  672. printf("Did NOT substitute.\n*****\n");
  673. }
  674. return U_ZERO_ERROR;
  675. }
  676. // 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
  677. #define BUFFERSIZE 17 /* make it interesting :) */
  678. UErrorCode convsample_40()
  679. {
  680. printf("\n\n==============================================\n"
  681. "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
  682. FILE *f;
  683. FILE *out;
  684. int32_t count;
  685. char inBuf[BUFFERSIZE];
  686. const char *source;
  687. const char *sourceLimit;
  688. UChar *uBuf;
  689. UChar *target;
  690. UChar *targetLimit;
  691. int32_t uBufSize = 0;
  692. UConverter *conv = NULL;
  693. UErrorCode status = U_ZERO_ERROR;
  694. uint32_t inbytes=0, total=0;
  695. f = fopen("data02.bin", "rb");
  696. if(!f)
  697. {
  698. fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
  699. return U_FILE_ACCESS_ERROR;
  700. }
  701. out = fopen("data40.utf16", "wb");
  702. if(!out)
  703. {
  704. fprintf(stderr, "Couldn't create file 'data40.utf16'.\n");
  705. fclose(f);
  706. return U_FILE_ACCESS_ERROR;
  707. }
  708. // **************************** START SAMPLE *******************
  709. conv = ucnv_openCCSID(37, UCNV_IBM, &status);
  710. assert(U_SUCCESS(status));
  711. uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
  712. printf("input bytes %d / min chars %d = %d UChars\n",
  713. BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
  714. uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
  715. assert(uBuf!=NULL);
  716. // grab another buffer's worth
  717. while((!feof(f)) &&
  718. ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
  719. {
  720. inbytes += count;
  721. // Convert bytes to unicode
  722. source = inBuf;
  723. sourceLimit = inBuf + count;
  724. do
  725. {
  726. target = uBuf;
  727. targetLimit = uBuf + uBufSize;
  728. ucnv_toUnicode( conv, &target, targetLimit,
  729. &source, sourceLimit, NULL,
  730. feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
  731. /* is true (when no more data will come) */
  732. &status);
  733. if(status == U_BUFFER_OVERFLOW_ERROR)
  734. {
  735. // simply ran out of space - we'll reset the target ptr the next
  736. // time through the loop.
  737. status = U_ZERO_ERROR;
  738. }
  739. else
  740. {
  741. // Check other errors here.
  742. assert(U_SUCCESS(status));
  743. // Break out of the loop (by force)
  744. }
  745. // Process the Unicode
  746. // Todo: handle UTF-16/surrogates
  747. assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
  748. (size_t)(target-uBuf));
  749. total += (target-uBuf);
  750. } while (source < sourceLimit); // while simply out of space
  751. }
  752. printf("%d bytes in, %d UChars out.\n", inbytes, total);
  753. // ***************************** END SAMPLE ********************
  754. ucnv_close(conv);
  755. fclose(f);
  756. fclose(out);
  757. printf("\n");
  758. return U_ZERO_ERROR;
  759. }
  760. #undef BUFFERSIZE
  761. // 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
  762. #define BUFFERSIZE 24 /* make it interesting :) */
  763. UErrorCode convsample_46()
  764. {
  765. printf("\n\n==============================================\n"
  766. "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
  767. FILE *f;
  768. FILE *out;
  769. int32_t count;
  770. UChar inBuf[BUFFERSIZE];
  771. const UChar *source;
  772. const UChar *sourceLimit;
  773. char *buf;
  774. char *target;
  775. char *targetLimit;
  776. int32_t bufSize = 0;
  777. UConverter *conv = NULL;
  778. UErrorCode status = U_ZERO_ERROR;
  779. uint32_t inchars=0, total=0;
  780. f = fopen("data40.utf16", "rb");
  781. if(!f)
  782. {
  783. fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
  784. return U_FILE_ACCESS_ERROR;
  785. }
  786. out = fopen("data46.out", "wb");
  787. if(!out)
  788. {
  789. fprintf(stderr, "Couldn't create file 'data46.out'.\n");
  790. fclose(f);
  791. return U_FILE_ACCESS_ERROR;
  792. }
  793. // **************************** START SAMPLE *******************
  794. conv = ucnv_open( "iso-8859-2", &status);
  795. assert(U_SUCCESS(status));
  796. bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
  797. printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
  798. BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
  799. buf = (char*)malloc(bufSize * sizeof(char));
  800. assert(buf!=NULL);
  801. // grab another buffer's worth
  802. while((!feof(f)) &&
  803. ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
  804. {
  805. inchars += count;
  806. // Convert bytes to unicode
  807. source = inBuf;
  808. sourceLimit = inBuf + count;
  809. do
  810. {
  811. target = buf;
  812. targetLimit = buf + bufSize;
  813. ucnv_fromUnicode( conv, &target, targetLimit,
  814. &source, sourceLimit, NULL,
  815. feof(f)?TRUE:FALSE, /* pass 'flush' when eof */
  816. /* is true (when no more data will come) */
  817. &status);
  818. if(status == U_BUFFER_OVERFLOW_ERROR)
  819. {
  820. // simply ran out of space - we'll reset the target ptr the next
  821. // time through the loop.
  822. status = U_ZERO_ERROR;
  823. }
  824. else
  825. {
  826. // Check other errors here.
  827. assert(U_SUCCESS(status));
  828. // Break out of the loop (by force)
  829. }
  830. // Process the Unicode
  831. assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
  832. (size_t)(target-buf));
  833. total += (target-buf);
  834. } while (source < sourceLimit); // while simply out of space
  835. }
  836. printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
  837. // ***************************** END SAMPLE ********************
  838. ucnv_close(conv);
  839. fclose(f);
  840. fclose(out);
  841. printf("\n");
  842. return U_ZERO_ERROR;
  843. }
  844. #undef BUFFERSIZE
  845. #define BUFFERSIZE 219
  846. /* main */
  847. int main()
  848. {
  849. printf("Default Converter=%s\n", ucnv_getDefaultName() );
  850. convsample_02(); // C , u->koi8r, conv
  851. convsample_03(); // C, iterate
  852. convsample_05(); // C, utf8->u, getNextUChar
  853. convsample_06(); // C freq counter thingy
  854. convsample_12(); // C, sjis->u, conv
  855. convsample_13(); // C, big5->u, getNextU
  856. convsample_20(); // C, callback
  857. convsample_21(); // C, callback debug
  858. convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
  859. convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
  860. printf("End of converter samples.\n");
  861. fflush(stdout);
  862. fflush(stderr);
  863. return 0;
  864. }