PageRenderTime 51ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/tests/testboundaries_ucd.c

https://gitlab.com/ImageMagick/pango
C | 436 lines | 334 code | 69 blank | 33 comment | 35 complexity | 72b523b456ad92779626b222ed1d9123 MD5 | raw file
Possible License(s): LGPL-2.0
  1. /* Pango
  2. * testboundaries_ucd.c: Test text boundary algorithms with test data from
  3. * Unicode Character Database.
  4. *
  5. * Copyright (C) 2003 Noah Levitt
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Library General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Library General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Library General Public
  18. * License along with this library; if not, write to the
  19. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20. * Boston, MA 02111-1307, USA.
  21. */
  22. #include <pango/pango.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #include <locale.h>
  26. static gboolean failed = FALSE;
  27. /* PangoLogAttr has to be the same size as guint or this hack breaks */
  28. typedef union
  29. {
  30. PangoLogAttr attr;
  31. guint bits;
  32. }
  33. AttrBits;
  34. /* counts the number of multiplication and divison signs up to the first
  35. * '#' or null character */
  36. static gint
  37. count_attrs (gchar *line)
  38. {
  39. gunichar ch;
  40. gchar *p = line;
  41. gint count = 0;
  42. for (;;)
  43. {
  44. ch = g_utf8_get_char (p);
  45. switch (ch)
  46. {
  47. /* MULTIPLICATION SIGN, DIVISION SIGN */
  48. case 0x00d7: case 0x00f7:
  49. count++;
  50. break;
  51. /* null char, NUMBER SIGN */
  52. case 0x0000: case 0x0023:
  53. return count;
  54. default:
  55. break;
  56. }
  57. p = g_utf8_next_char (p);
  58. }
  59. /* not reached */
  60. }
  61. static gboolean
  62. parse_line (gchar *line,
  63. AttrBits bits,
  64. gchar **str_return,
  65. PangoLogAttr **attr_return,
  66. gint *num_attrs)
  67. {
  68. GString *gs;
  69. gunichar ch, character;
  70. gchar *p, *q;
  71. gint i;
  72. AttrBits temp_attr;
  73. *num_attrs = count_attrs (line);
  74. *attr_return = g_new (PangoLogAttr, *num_attrs);
  75. p = line;
  76. i = 0;
  77. gs = g_string_new (NULL);
  78. for (;;)
  79. {
  80. temp_attr.bits = 0;
  81. /* skip white space */
  82. do
  83. {
  84. ch = g_utf8_get_char (p);
  85. p = g_utf8_next_char (p);
  86. }
  87. while (g_unichar_isspace (ch));
  88. switch (ch)
  89. {
  90. case 0x00f7: /* DIVISION SIGN: boundary here */
  91. temp_attr.bits |= bits.bits;
  92. /* fall through */
  93. case 0x00d7: /* MULTIPLICATION SIGN: no boundary here */
  94. break;
  95. case 0x0000:
  96. case 0x0023:
  97. *str_return = g_string_free (gs, FALSE);
  98. return TRUE;
  99. default: /* unexpected character */
  100. g_free (*attr_return);
  101. return FALSE;
  102. }
  103. (*attr_return)[i] = temp_attr.attr;
  104. /* skip white space */
  105. do
  106. {
  107. ch = g_utf8_get_char (p);
  108. p = g_utf8_next_char (p);
  109. }
  110. while (g_unichar_isspace (ch));
  111. p = g_utf8_prev_char (p);
  112. if (ch == 0x0023 || ch == 0x0000)
  113. {
  114. *str_return = g_string_free (gs, FALSE);
  115. return TRUE;
  116. }
  117. character = strtoul (p, &q, 16);
  118. if (q < p + 4 || q > p + 6 || character > 0x10ffff)
  119. {
  120. g_free (*attr_return);
  121. return FALSE;
  122. }
  123. p = q;
  124. gs = g_string_append_unichar (gs, character);
  125. i++;
  126. }
  127. }
  128. static gboolean
  129. attrs_equal (PangoLogAttr *attrs1,
  130. PangoLogAttr *attrs2,
  131. gint len,
  132. AttrBits bits)
  133. {
  134. AttrBits a, b;
  135. gint i;
  136. for (i = 0; i < len; i++)
  137. {
  138. a.bits = 0;
  139. a.attr = attrs1[i];
  140. b.bits = 0;
  141. b.attr = attrs2[i];
  142. /* can't do a straight comparison because the bitmask may have
  143. * multiple bits set, and as long as attr&bitmask is not zero, it
  144. * counts as being set */
  145. if (((a.bits & bits.bits) && !(b.bits & bits.bits)) ||
  146. (!(a.bits & bits.bits) && (b.bits & bits.bits)))
  147. return FALSE;
  148. }
  149. return TRUE;
  150. }
  151. static gchar *
  152. make_test_string (gchar *string,
  153. PangoLogAttr *attrs,
  154. AttrBits bits)
  155. {
  156. GString *gs = g_string_new (NULL);
  157. gint i = 0;
  158. AttrBits a;
  159. gchar *p = string;
  160. gunichar ch;
  161. for (;;)
  162. {
  163. a.bits = 0;
  164. a.attr = attrs[i];
  165. if ((a.bits & bits.bits) != 0)
  166. gs = g_string_append_unichar (gs, 0x00f7);
  167. else
  168. gs = g_string_append_unichar (gs, 0x00d7);
  169. g_string_append_c (gs, ' ');
  170. if (*p == '\0')
  171. break;
  172. ch = g_utf8_get_char (p);
  173. g_string_append_printf (gs, "%04X ", ch);
  174. p = g_utf8_next_char (p);
  175. i++;
  176. }
  177. return g_string_free (gs, FALSE);
  178. }
  179. static void
  180. do_test (const gchar *filename,
  181. AttrBits bits)
  182. {
  183. GIOChannel *channel;
  184. GIOStatus status;
  185. gchar *line;
  186. gsize length, terminator_pos;
  187. GError *error;
  188. gchar *string;
  189. PangoLogAttr *expected_attrs;
  190. gint num_attrs;
  191. gint i;
  192. error = NULL;
  193. channel = g_io_channel_new_file (filename, "r", &error);
  194. if (!channel)
  195. {
  196. if (error->domain == G_FILE_ERROR && error->code == G_FILE_ERROR_NOENT)
  197. {
  198. g_print ("%s not found. Skipping test.\n", filename);
  199. goto done;
  200. }
  201. else
  202. {
  203. g_printerr ("%s: %s\n", filename, error->message);
  204. exit (1);
  205. }
  206. }
  207. g_print ("Testing %s.\n", filename);
  208. i = 1;
  209. for (;;)
  210. {
  211. error = NULL;
  212. status = g_io_channel_read_line (channel, &line, &length, &terminator_pos, &error);
  213. switch (status)
  214. {
  215. case G_IO_STATUS_ERROR:
  216. g_printerr ("%s: %s\n", filename, error->message);
  217. exit (1);
  218. case G_IO_STATUS_EOF:
  219. goto done;
  220. case G_IO_STATUS_AGAIN:
  221. continue;
  222. case G_IO_STATUS_NORMAL:
  223. line[terminator_pos] = '\0';
  224. break;
  225. }
  226. if (! parse_line (line, bits, &string, &expected_attrs, &num_attrs))
  227. {
  228. g_printerr ("%s: error parsing line %d: %s\n", filename, i, line);
  229. exit (1);
  230. }
  231. if (num_attrs > 0)
  232. {
  233. PangoLogAttr *attrs = g_new (PangoLogAttr, num_attrs);
  234. pango_get_log_attrs (string, -1, 0, pango_language_from_string ("C"), attrs, num_attrs);
  235. if (! attrs_equal (attrs, expected_attrs, num_attrs, bits))
  236. {
  237. gchar *str = make_test_string (string, attrs, bits);
  238. gchar *comments = strchr (line, '#');
  239. if (comments) /* don't print the # comment in the error message. print it separately */
  240. {
  241. *comments = '\0';
  242. comments++;
  243. }
  244. else
  245. {
  246. comments = "";
  247. }
  248. g_printerr ("%s: line %d failed\n"
  249. " expected: %s\n"
  250. " returned: %s\n"
  251. " comments: %s\n\n",
  252. filename, i, line, str, comments);
  253. g_free (str);
  254. failed = TRUE;
  255. }
  256. g_free (attrs);
  257. }
  258. g_free (string);
  259. g_free (expected_attrs);
  260. i++;
  261. }
  262. done:
  263. if (channel)
  264. g_io_channel_unref (channel);
  265. if (error)
  266. g_error_free (error);
  267. g_assert (!failed);
  268. }
  269. static void
  270. test_grapheme_break (void)
  271. {
  272. const gchar *filename;
  273. AttrBits bits;
  274. #if GLIB_CHECK_VERSION(2, 37, 2)
  275. filename = g_test_get_filename (G_TEST_DIST, "GraphemeBreakTest.txt", NULL);
  276. #else
  277. filename = SRCDIR "/GraphemeBreakTest.txt";
  278. #endif
  279. bits.bits = 0;
  280. bits.attr.is_cursor_position = 1;
  281. do_test (filename, bits);
  282. }
  283. static void
  284. test_emoji_break (void)
  285. {
  286. const gchar *filename;
  287. AttrBits bits;
  288. #if GLIB_CHECK_VERSION(2, 37, 2)
  289. filename = g_test_get_filename (G_TEST_DIST, "EmojiBreakTest.txt", NULL);
  290. #else
  291. filename = SRCDIR "/EmojiBreakTest.txt";
  292. #endif
  293. bits.bits = 0;
  294. bits.attr.is_cursor_position = 1;
  295. do_test (filename, bits);
  296. }
  297. static void
  298. test_char_break (void)
  299. {
  300. const gchar *filename;
  301. AttrBits bits;
  302. #if GLIB_CHECK_VERSION(2, 37, 2)
  303. filename = g_test_get_filename (G_TEST_DIST, "CharBreakTest.txt", NULL);
  304. #else
  305. filename = SRCDIR "/CharBreakTest.txt";
  306. #endif
  307. bits.bits = 0;
  308. bits.attr.is_char_break = 1;
  309. do_test (filename, bits);
  310. }
  311. static void
  312. test_word_break (void)
  313. {
  314. const gchar *filename;
  315. AttrBits bits;
  316. #if GLIB_CHECK_VERSION(2, 37, 2)
  317. filename = g_test_get_filename (G_TEST_DIST, "WordBreakTest.txt", NULL);
  318. #else
  319. filename = SRCDIR "/WordBreakTest.txt";
  320. #endif
  321. bits.bits = 0;
  322. bits.attr.is_word_boundary = 1;
  323. do_test (filename, bits);
  324. }
  325. static void
  326. test_sentence_break (void)
  327. {
  328. const gchar *filename;
  329. AttrBits bits;
  330. #if GLIB_CHECK_VERSION(2, 37, 2)
  331. filename = g_test_get_filename (G_TEST_DIST, "SentenceBreakTest.txt", NULL);
  332. #else
  333. filename = SRCDIR "/SentenceBreakTest.txt";
  334. #endif
  335. bits.bits = 0;
  336. bits.attr.is_sentence_boundary = 1;
  337. do_test (filename, bits);
  338. }
  339. static void
  340. test_line_break (void)
  341. {
  342. const gchar *filename;
  343. AttrBits bits;
  344. #if GLIB_CHECK_VERSION(2, 37, 2)
  345. filename = g_test_get_filename (G_TEST_DIST, "LineBreakTest.txt", NULL);
  346. #else
  347. filename = SRCDIR "/LineBreakTest.txt";
  348. #endif
  349. bits.bits = 0;
  350. bits.attr.is_line_break = 1;
  351. bits.attr.is_mandatory_break = 1;
  352. do_test (filename, bits);
  353. }
  354. gint
  355. main (gint argc,
  356. gchar **argv)
  357. {
  358. setlocale (LC_ALL, "");
  359. g_test_init (&argc, &argv, NULL);
  360. g_test_add_func ("/text/break/grapheme", test_grapheme_break);
  361. g_test_add_func ("/text/break/word", test_word_break);
  362. g_test_add_func ("/text/break/sentence", test_sentence_break);
  363. g_test_add_func ("/text/break/line", test_line_break);
  364. g_test_add_func ("/text/break/emoji", test_emoji_break);
  365. g_test_add_func ("/text/break/char", test_char_break);
  366. return g_test_run ();
  367. }